In [6]:
# Import libraries
import os
import xarray as xr
import pandas as pd
import numpy as np

In [2]:
# Edit this according to your home directory and where you run this code
home_path = '/Users/skylargale/Documents/VSCode/'

In [3]:
# Step 1: Combine the .nc files

# Define the directory where the raw data is stored
raw_data_dir = os.path.join(home_path, 'MLGEO2024_SeaIcePrediction/data/raw/seaice_conc')
nc_files = [os.path.join(raw_data_dir, filename) for filename in os.listdir(raw_data_dir) if filename.endswith('.nc')]

# Prepare an empty list to store the modified datasets
datasets = []

# Define time points (adjust according to your actual time steps)
time_points = pd.date_range(start='1978-11-01', periods=len(nc_files), freq='MS')

# Loop through each file, open it, and assign a time coordinate
for i, nc_file in enumerate(nc_files):
    # Open the dataset
    ds = xr.open_dataset(nc_file)
    
    # Assign a time coordinate
    ds = ds.assign_coords(time=time_points[i])  # Assign the unique time coordinate
    ds = ds.expand_dims(dim='time')  # Expand to add time dimension of size 1
    
    # Append the modified dataset to the list
    datasets.append(ds)

In [4]:
# Step 2: Concatenate and clean up the data

# Concatenate the datasets along the time dimension
combined_ds = xr.concat(datasets, dim='time')

# Drop the 'tdim' dimension
combined_squeezed = combined_ds.squeeze('tdim')

In [7]:
# Step 3: Regrid the data

# Replace xgrid and ygrid with lat and lon
combined_renamed = combined_squeezed.rename({'x': 'lon', 'y': 'lat'})

# Drop the xgrid and ygrid variables
combined_dropped = combined_renamed.drop_vars(['xgrid', 'ygrid'], errors='ignore')

# Assign the lat and lon coordinates
combined_gridded = combined_dropped.assign_coords(lon=np.linspace(-180, 180, 304), lat=np.linspace(31.35, 90, 448))

In [8]:
# Step 4: Save the combined dataset to a new .nc file for ai-ready data
output_filename = 'clean_nh_monthly_sic_19782024.nc'
clean_data_dir = os.path.join(home_path, 'MLGEO2024_SeaIcePrediction/data/clean')
combined_gridded.to_netcdf(output_filename)
print(f'Saved combined dataset to {os.path.join(clean_data_dir, output_filename)}')

Saved combined dataset to /Users/skylargale/Documents/VSCode/MLGEO2024_SeaIcePrediction/data/clean/clean_nh_monthly_sic_19782024.nc
