In [None]:
import xarray as xr
import os

# Create the 'data/clean/' directory if it doesn't exist
output_directory = 'data/clean/'
os.makedirs(output_directory, exist_ok=True)

# Loop through all days in July
for day in range(1, 32):  # Days 1 to 31
    # Format the day to be two digits (e.g., 01, 02, ..., 31)
    day_str = f"{day:02d}"
    
    # Construct the filename for the current day
    filename = f'CAPE_Jul_{day_str}.nc'
    
    try:
        # Load the dataset for the current day
        day_ds = xr.open_dataset(filename)

        # Adjust coordinates
        day_ds.coords['longitude'] = (day_ds.coords['longitude'] + 180) % 360 - 180
        day_ds = day_ds.sortby(day_ds.latitude).sortby(day_ds.longitude)

        # Handle missing values
        day_ds['cape'] = day_ds['cape'].fillna(day_ds['cape'].mean())

        # Correct or remove outliers using the IQR method
        Q1 = day_ds['cape'].quantile(0.25)
        Q3 = day_ds['cape'].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        day_ds_cleaned = day_ds.where((day_ds['cape'] >= lower_bound) & (day_ds['cape'] <= upper_bound), drop=True)

        # Save the cleaned data to a new netCDF file
        cleaned_filename = f'{output_directory}CAPE_Jul_{day_str}_cleaned.nc'
        day_ds_cleaned.to_netcdf(cleaned_filename)
        print(f'Successfully cleaned and saved: {cleaned_filename}')

    except FileNotFoundError:
        print(f'File not found: {filename}. Skipping this day.')
    except Exception as e:
        print(f'Error processing {filename}: {e}')