In [30]:
#!pip install netCDF4
#from netCDF4 import Dataset

#dataset = Dataset('sipexII.nc')
#print(dataset)  # Prints summary info
#print(dataset.variables.keys())  # Prints variable names
import numpy as np
import numpy.ma as ma
from netCDF4 import Dataset, num2date
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime

def process_wave_data(file_path="PIPERS.nc"):
    try:
        # Load netCDF file
        a = Dataset(file_path, 'r')
        
        # Read variables
        buoy = a.variables['buoy'][:]
        lat = a.variables['lat'][:]
        lon = a.variables['lon'][:]
        psd = a.variables['psd'][:]
        fbin = a.variables['bin'][:]  # Changed from bin_
        moment = a.variables['moments'][:]
        time = a.variables['time'][:]  # Added [:] to actually read the data
        time_units = "seconds since 2017-01-01 00:00:00 UTC"
        time_calendar = 'gregorian'

        # Create boolean masks
        lat_zero_mask = (lat == 0)  
        lat_mask = lat_zero_mask  # Added definition for lat_mask
        
        # Create masked arrays
        lat_masked = np.ma.masked_array(lat, mask=lat_mask)
        lon_masked = np.ma.masked_array(lon, mask=lat_mask)
        lat_mask_expanded = np.expand_dims(lat_mask, axis=2)
        psd_masked = np.ma.masked_array(psd, mask=np.broadcast_to(lat_mask_expanded, psd.shape))
        moment_masked = np.ma.masked_array(moment, mask=np.broadcast_to(lat_mask_expanded, moment.shape))

        # Convert to datetime objects
        dates = num2date(time, units=time_units, calendar=time_calendar)
        # Convert to strings maintaining the same length
        date_strings = []
        for d in dates:
            try:
                if np.ma.is_masked(d):
                    date_strings.append(None)  # or use np.nan
                else:
                    date_strings.append(pd.Timestamp(d.isoformat()).strftime("%d/%m/%Y %H:%M (UTC)"))
            except:
                date_strings.append(None)  # or use np.nan
                
        # Calculate significant wave height
        Hs = 4 * np.sqrt(moment_masked[:, :, 2])
        
        # Calculate peak period
        Tp = np.full((14, 12000), np.nan)
        for i in range(14):
            for j in range(12000):
                if not ma.is_masked(psd[i,j,0]):
                    if ma.is_masked(psd_masked[i,j,:]):
                        psd_slice = psd_masked[i,j,:].compressed()
                        if len(psd_slice) > 0:
                            mi = np.argmax(psd_slice)
                            Tp[i,j] = 1/fbin[mi]  # Changed from bin_ to fbin
                    else:
                        mi = np.argmax(psd_masked[i,j,:])
                        Tp[i,j] = 1/fbin[mi]
                
                if (not ma.is_masked(Hs[i,j]) and Hs[i,j] < 0.1):
                    Hs[i,j] = np.nan
                    Tp[i,j] = np.nan

        # Process each sensor
        for sensor_idx in range(14):  # Changed from 15 to 14 to match array dimensions
            # Create data for DataFrame
            data = {
                'DD/MM/YYYY HH:MM (UTC)': date_strings,
                'Latitude (decimal degrees)': lat_masked[sensor_idx, :],
                'Longitude (decimal degrees)': lon_masked[sensor_idx, :],
                'Significant Wave Height (m)': Hs[sensor_idx, :],
                'Peak Period (s)': Tp[sensor_idx, :]
            }
            
            # Create DataFrame
            df = pd.DataFrame(data)
            
            # Filter out rows with missing lat/lon
            df_filtered = df.dropna(subset=['Latitude (decimal degrees)', 'Longitude (decimal degrees)'])
            
            # Save to CSV
            csv_filename = f'PIPERS{sensor_idx+1}_data.csv'  # Use sensor_idx instead of original_sensor_id
            df_filtered.to_csv(csv_filename, index=False)
            print(f"Saved {csv_filename} with {len(df_filtered)} rows")
               
            try:
                # Plot data
                if len(df_filtered) > 0:
                    create_plots(df_filtered, sensor_idx+1)
            except Exception as e:
                print(f"Error creating plot for sensor {sensor_idx+1}: {str(e)}")
                

        # Close the netCDF file
        a.close()  # Changed from nc to a
        print("Data processing completed.")
        return True
        
    except Exception as e:
        print(f"Error processing data: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

def create_plots(df, sensor_id):
    try:
        import matplotlib.dates as mdates
        
        # Create a fresh DataFrame to avoid the SettingWithCopyWarning
        plot_df = df[['DD/MM/YYYY HH:MM (UTC)', 'Significant Wave Height (m)', 'Peak Period (s)']].copy()
        plot_df = plot_df.dropna()
        
        if len(plot_df) == 0:
            print(f"No valid dates for sensor {int(sensor_id)}")
            return

        # Convert datetime strings to datetime objects
        plot_df.loc[:, 'datetime'] = pd.to_datetime(plot_df['DD/MM/YYYY HH:MM (UTC)'], 
                                                   format='%d/%m/%Y %H:%M (UTC)')
        
        # Calculate time span using datetime objects
        time_span = plot_df['datetime'].max() - plot_df['datetime'].min()
        
        # Create figure and primary axis for wave height
        fig, ax1 = plt.subplots(figsize=(14, 7))
        
        # Plot wave height on the primary axis
        color1 = 'tab:blue'
        ax1.set_ylabel('Significant Wave Height (m)', color=color1, fontsize=12)
        line1 = ax1.plot(plot_df['datetime'], plot_df['Significant Wave Height (m)'], 
                 color=color1, label='Significant Wave Height',linestyle='None', marker='o', markersize=4)
        ax1.tick_params(axis='y', labelcolor=color1)
        
        # Set y-axis limit for wave height
        ax1.set_ylim(0, min(20, plot_df['Significant Wave Height (m)'].max() * 1.1))
        
        # Create second y-axis
        ax2 = ax1.twinx()
        
        # Plot peak period on secondary axis
        color2 = 'tab:red'
        ax2.set_ylabel('Peak Period (s)', color=color2, fontsize=12)
        line2 = ax2.plot(plot_df['datetime'], plot_df['Peak Period (s)'], 
                 color=color2, label='Peak Period', marker='x',linestyle='None')
        ax2.tick_params(axis='y', labelcolor=color2)
        
        # Format x-axis based on time span
        if time_span.days > 180:
            date_format = mdates.DateFormatter('%b %Y')
            ax1.xaxis.set_major_locator(mdates.MonthLocator())
        elif time_span.days > 30:
            date_format = mdates.DateFormatter('%d %b')
            ax1.xaxis.set_major_locator(mdates.DayLocator(interval=7))  # Changed from WeekLocator
        elif time_span.days > 7:
            date_format = mdates.DateFormatter('%d %b')
            ax1.xaxis.set_major_locator(mdates.DayLocator())
        elif time_span.days > 1:
            date_format = mdates.DateFormatter('%d %H:%M')
            ax1.xaxis.set_major_locator(mdates.HourLocator(interval=6))
        else:
            date_format = mdates.DateFormatter('%H:%M')
            ax1.xaxis.set_major_locator(mdates.HourLocator())
            
        ax1.xaxis.set_major_formatter(date_format)
        
        # Rotate and align the tick labels so they look better
        plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
        
        # Add title and legend
        plt.title(f'Wave Data - Sensor {int(sensor_id)}', fontsize=14)
        ax1.legend(line1 + line2, ['Significant Wave Height', 'Peak Period'], 
                  loc='upper right', fontsize=11)
        
        # Add grid and adjust layout
        ax1.grid(True, alpha=0.3)
        ax1.set_xlabel('Date/Time (UTC)', fontsize=12)
        fig.tight_layout()
        
        # Save figure
        plt.savefig(f'PIPERS{int(sensor_id)}_plot.png', dpi=300)
        plt.close()
        
        print(f"Plot saved for sensor {int(sensor_id)} with {len(plot_df)} data points")
        
    except Exception as e:
        print(f"Error creating plot: {str(e)}")
        import traceback
        traceback.print_exc()


# Call the function to process the data
if __name__ == "__main__":
    success = process_wave_data()
    
    if success:
        print("Script completed successfully.")
    else:
        print("Script failed. Check error messages above.")

Saved PIPERS_1_data.csv with 2316 rows
Plot saved for sensor 1 with 979 data points
Saved PIPERS_2_data.csv with 4225 rows
Plot saved for sensor 2 with 1065 data points
Saved PIPERS_3_data.csv with 378 rows
Plot saved for sensor 3 with 357 data points
Saved PIPERS_4_data.csv with 526 rows
Plot saved for sensor 4 with 103 data points
Saved PIPERS_5_data.csv with 2665 rows
Plot saved for sensor 5 with 1733 data points
Saved PIPERS_6_data.csv with 2335 rows
Plot saved for sensor 6 with 1897 data points
Saved PIPERS_7_data.csv with 3041 rows
Plot saved for sensor 7 with 2475 data points
Saved PIPERS_8_data.csv with 421 rows
Plot saved for sensor 8 with 293 data points
Saved PIPERS_9_data.csv with 1890 rows
Plot saved for sensor 9 with 1863 data points
Saved PIPERS_10_data.csv with 3133 rows
Plot saved for sensor 10 with 2925 data points
Saved PIPERS_11_data.csv with 378 rows
Plot saved for sensor 11 with 305 data points
Saved PIPERS_12_data.csv with 0 rows
Saved PIPERS_13_data.csv with 289