In [23]:
import numpy as np
import numpy.ma as ma
from netCDF4 import Dataset, num2date
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
dataset = Dataset('jare.nc')
print(dataset)  # Prints summary info
print(dataset.variables.keys())  # Prints variable names
print(dataset.variables['time'])

def process_wave_data(file_path="jare.nc"):
    try:
        # Load netCDF file
        a = Dataset(file_path, 'r')
        
        # Read variables
        buoy = a.variables['buoy'][:]
        lat = a.variables['lat'][:]
        lon = a.variables['lon'][:]
        psd = a.variables['psd'][:]
        fbin = a.variables['bin'][:]  # Changed from bin_
        deployed = a.variables['deployed'][:]
        moment = a.variables['moments_processed'][:]
        time = a.variables['time'][:]  # Added [:] to actually read the data
        time_units = "seconds since 2019-01-01 00:00:00 UTC"
        time_calendar = 'gregorian'

        # Create boolean masks
        lat_zero_mask = (lat == 0)  
        deployed_mask = (deployed != 1)

        # Apply masks to lat
        lat_mask = lat_zero_mask | deployed_mask
        lat_masked = np.ma.masked_array(lat, mask=lat_mask)
        
        # Create masked arrays
        lat_masked = np.ma.masked_array(lat, mask=lat_mask)
        lon_masked = np.ma.masked_array(lon, mask=lat_mask)
        lat_mask_expanded = np.expand_dims(lat_mask, axis=2)
        psd_masked = np.ma.masked_array(psd, mask=np.broadcast_to(lat_mask_expanded, psd.shape))
        moment_masked = np.ma.masked_array(moment, mask=np.broadcast_to(lat_mask_expanded, moment.shape))

        # Convert to datetime objects
        dates = num2date(time, units=time_units, calendar=time_calendar)
        # Convert to strings maintaining the same length
        date_strings = []
        for d in dates:
            try:
                if np.ma.is_masked(d):
                    date_strings.append(None)  # or use np.nan
                else:
                    date_strings.append(pd.Timestamp(d.isoformat()).strftime("%d/%m/%Y %H:%M (UTC)"))
            except:
                date_strings.append(None)  # or use np.nan
                
        # Calculate significant wave height
        Hs = 4 * np.sqrt(moment_masked[:, :, 2])
        
        # Calculate peak period
        Tp = np.full((5, 20448), np.nan)
        for i in range(5):
            for j in range(20448):
                if not ma.is_masked(psd[i,j,0]):
                    if ma.is_masked(psd_masked[i,j,:]):
                        psd_slice = psd_masked[i,j,:].compressed()
                        if len(psd_slice) > 0:
                            mi = np.argmax(psd_slice)
                            Tp[i,j] = 1/fbin[mi]  # Changed from bin_ to fbin
                    else:
                        mi = np.argmax(psd_masked[i,j,:])
                        Tp[i,j] = 1/fbin[mi]
                
                if (not ma.is_masked(Hs[i,j]) and Hs[i,j] < 0.1):
                    Hs[i,j] = np.nan
                    Tp[i,j] = np.nan

        # Process each sensor
        for sensor_idx in range(5):  
            # Create data for DataFrame
            data = {
                'DD/MM/YYYY HH:MM (UTC)': date_strings,
                'Latitude (decimal degrees)': lat_masked[sensor_idx, :],
                'Longitude (decimal degrees)': lon_masked[sensor_idx, :],
                'Significant Wave Height (m)': Hs[sensor_idx, :],
                'Peak Period (s)': Tp[sensor_idx, :]
            }
            
            # Create DataFrame
            df = pd.DataFrame(data)
            
            # Filter out rows with missing lat/lon
            df_filtered = df.dropna(subset=['Latitude (decimal degrees)', 'Longitude (decimal degrees)'])
            
            # Save to CSV
            csv_filename = f'JARE{sensor_idx+1}_data.csv'  # Use sensor_idx instead of original_sensor_id
            df_filtered.to_csv(csv_filename, index=False)
            print(f"Saved {csv_filename} with {len(df_filtered)} rows")
               
            try:
                # Plot data
                if len(df_filtered) > 0:
                    create_plots(df_filtered, sensor_idx+1)
            except Exception as e:
                print(f"Error creating plot for sensor {sensor_idx+1}: {str(e)}")
                

        # Close the netCDF file
        a.close()  # Changed from nc to a
        print("Data processing completed.")
        return True
        
    except Exception as e:
        print(f"Error processing data: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

def create_plots(df, sensor_id):
    try:
        # Create a fresh DataFrame to avoid the SettingWithCopyWarning
        plot_df = df[['DD/MM/YYYY HH:MM (UTC)', 'Significant Wave Height (m)', 'Peak Period (s)']].copy()
        plot_df = plot_df.dropna()
        
        if len(plot_df) == 0:
            print(f"No valid data to plot for sensor {sensor_id}")
            return

        # Remove the " (UTC)" suffix and convert to datetime
        plot_df['datetime'] = pd.to_datetime(
            plot_df['DD/MM/YYYY HH:MM (UTC)'].str.replace(' (UTC)', ''),
            format='%d/%m/%Y %H:%M'
        )
        
        # Sort by date for chronological plotting
        plot_df = plot_df.sort_values('datetime')
        
        # Calculate time span
        time_span = plot_df['datetime'].max() - plot_df['datetime'].min()
        
        # Create figure and primary axis for wave height
        fig, ax1 = plt.subplots(figsize=(14, 7))
        
        # Plot wave height
        color1 = 'tab:blue'
        ax1.set_ylabel('Significant Wave Height (m)', color=color1, fontsize=12)
        line1 = ax1.plot(plot_df['datetime'], plot_df['Significant Wave Height (m)'],
                        color=color1, label='Significant Wave Height',
                        linestyle='None', marker='o', markersize=4)
        ax1.tick_params(axis='y', labelcolor=color1)
        
        # Set y-axis limit for wave height
        ax1.set_ylim(0, min(20, plot_df['Significant Wave Height (m)'].max() * 1.1))
        
        # Format x-axis based on time span
        if time_span.days > 180:  # More than 6 months
            date_format = mdates.DateFormatter('%b')
            ax1.xaxis.set_major_locator(mdates.MonthLocator())
            xlabel = f'Date/Time (UTC) - {plot_df["datetime"].min().strftime("%Y")}'
        elif time_span.days > 30:  # More than a month
            date_format = mdates.DateFormatter('%d %b')
            ax1.xaxis.set_major_locator(mdates.DayLocator(interval=7))  # Changed from WeekLocator
            xlabel = f'Date/Time (UTC) - {plot_df["datetime"].min().strftime("%Y")}'
        elif time_span.days > 7:  # More than a week
            date_format = mdates.DateFormatter('%d %b')
            ax1.xaxis.set_major_locator(mdates.DayLocator())
            xlabel = f'Date/Time (UTC) - {plot_df["datetime"].min().strftime("%Y")}'
        elif time_span.days > 1:  # More than a day
            date_format = mdates.DateFormatter('%d %H:%M')
            ax1.xaxis.set_major_locator(mdates.HourLocator(interval=6))
            xlabel = f'Date/Time (UTC) - {plot_df["datetime"].min().strftime("%b %Y")}'
        else:  # Less than a day
            date_format = mdates.DateFormatter('%H:%M')
            ax1.xaxis.set_major_locator(mdates.HourLocator())
            xlabel = f'Date/Time (UTC) - {plot_df["datetime"].min().strftime("%d %b %Y")}'
        
        ax1.xaxis.set_major_formatter(date_format)
        
        # Rotate and align the tick labels so they look better
        plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
        
        # Add title and legend
        plt.title(f'Wave Data - Sensor {sensor_id}', fontsize=14)
        ax1.legend(loc='upper right', fontsize=11)
        
        # Add grid and labels
        ax1.grid(True, alpha=0.3)
        ax1.set_xlabel(xlabel, fontsize=12)
        
        # Adjust layout and save
        fig.tight_layout()
        plt.savefig(f'JARE{sensor_id}_plot.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Plot saved for sensor {sensor_id} with {len(plot_df)} data points")
        
    except Exception as e:
        print(f"Error creating plot for sensor {sensor_id}: {str(e)}")
        import traceback
        traceback.print_exc()

# Call the function to process the data
if __name__ == "__main__":
    success = process_wave_data()
    
    if success:
        print("Script completed successfully.")
    else:
        print("Script failed. Check error messages above.")

<class 'netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    title: Waves-in-ice data collected during JARE61
    lat and lon units: Decimal degrees
    institution: University of Tasmania and New Zealand's National Institute for Wat
    creator_name: Alison Kohout
    creator_email: alison.kohout@niwa.co.nz
    project: Australian Research Council Discovery Project DP170103774 'Advancing Wave-Ice Models with Autonomous Observations
    contributor_name: Guy Williams, Alison Kohout, Pat Wongpan, Bill Penrose, Scott Penrose
    contributor_role: GW: Lead Chief Investigator, AK: Chief Investigator, PW: Field Leader, BP: Engineer, SP: Software Engineer
    summary: The aim of this project was to observe wave propagation in the marginal ice zone (MIZ). Each sensor performed on-board spectral analysis and data quality control. The data was returned via Iridium data packets. The instrumentation development, construction and deployment was funded through the Aus