# Preprocessing RIO Fields
This script outputs ensemble and time averaged RIO fields for a specific month.

The process is run in for loop below iterating over momths jul, sep, nov and time slices 2020-2040, 2050-2070

In [5]:
import xarray as xr
import numpy as np
import os
import pandas as pd
import scipy
import pandas as pd
from scipy import interpolate

### Functions for reading GCM data

In [2]:
def yearcheck(file, year):
    check = True
    with xr.open_dataset(file) as ds:
        time = ds.coords['time'].values[:]
        
        if type(time[0]) == np.datetime64:
            time = pd.to_datetime(time) 
            
        ds_years= [time[i].year for i in range(len(time))]
    if year not in ds_years:
        check = False
    return check

def extract_gcm_data(ncfile,var,year,month):
    with xr.open_dataset(ncfile) as ds:
        ds = ds.where(((ds['time.year'] == year)), drop=True)
        ds = ds.sel(time=ds.time.dt.month.isin([month]))
        dt = np.array(ds.coords['time'][:])
        if 'lat' in ds.variables and 'lon' in ds.variables:
            x,y = np.array(ds.variables['lon'][:]),np.array(ds.variables['lat'][:])
        elif 'latitude' in ds.variables and 'longitude' in ds.variables:
            x,y = np.array(ds.variables['longitude'][:]),np.array(ds.variables['latitude'][:])
        else:
            raise ValueError("Unable to find latitude and longitude variables in the dataset.")
        x = np.where(x<0,x+360,x)
        var_field = np.array(ds.variables[var][:])
    return var_field,x,y,dt

def gcm_interp(x,y,z,xgrid,ygrid):
    x = np.ravel(x)
    y = np.ravel(y)
    if np.ndim(z) == 3:
        for tstep in range(z.shape[0]):
            ztemp = np.ravel(z[tstep,:,:])
            temp = scipy.interpolate.griddata((x,y),ztemp,(xgrid,ygrid),method='nearest')
            if tstep == 0:
                zgrid = temp
            else:
                zgrid = np.dstack((zgrid,temp))
        zgrid = np.transpose(zgrid, (2, 0, 1))
    else:
        ztemp = np.ravel(z[:,:])
        temp = scipy.interpolate.griddata((x,y),ztemp,(xgrid,ygrid),method='linear')
        temp1 = scipy.interpolate.griddata((x,y),ztemp,(xgrid,ygrid),method='nearest')
        temp  = np.where(np.isnan(temp),temp1,temp)
        zgrid = temp
    return zgrid

def concatenate_gcm(rootdir,var,year0,yearn,month):
    years = np.arange(year0,yearn,1)
    for count,year in enumerate(years):
        file = find_gcm_file(rootdir,year)
        temp,x,y,temp_dt = extract_gcm_data(file,var,year,month)
        if count == 0:
            series = temp
            dt = temp_dt
        else:
            series = np.vstack((series,temp))
            dt = np.append(dt,temp_dt)
    return series,dt,x,y

def find_gcm_file(root_directory,year):
    for root, dirs, files in os.walk(root_directory):
        filecheck = False
        for file in files:
            filecheck = yearcheck(os.path.join(root, file),year)
            if filecheck == False:
                continue
            else:
                matching_file = (os.path.join(root, file))
                break
        if filecheck == False:
            matching_file = []
            print(f'A file which contained the year {year} was not found!')
    return matching_file

### Function to derive RIO fields

In [3]:
def create_rio(sit,sic,ice_class):
    sithick = sit
    sithick = np.where(np.isnan(sithick),0,sithick)
    siconc = sic
    if np.nanmax(siconc) > 15:
        siconc = siconc *0.01

    sit_temp = np.array(sithick)
    #Max SIT value in POLARIS is 3m
    sit_temp = np.where(sit_temp>3,3.1,sit_temp)
    for count, class_val in enumerate(ice_class):
        bin_vals = np.where((sithick>min_sit[count])&(sithick<max_sit[count]))
        sit_temp[bin_vals] = class_val

    rio_ice = sit_temp*np.round(siconc, decimals=1)*10
    rio_ow = (1-np.round(siconc, decimals=1))*30
    rio = rio_ice+rio_ow
    return rio

### Load the polar classes' risk value csv file. 

In [7]:
csv_file = 'POLAR_CLASSES.csv'

df = pd.read_csv(csv_file)
min_sit = np.array(df.iloc[0, 2:])/100
max_sit = np.array(df.iloc[1, 2:])/100
ice_class = np.array(df.iloc[13, 2:])
ice_class_name = np.array(df.iloc[13, 0])
print(f'The selected vessel ice class is: {ice_class_name}')

The selected vessel ice class is: NOICECLASS


### Derive and save ensemble and yearly averages monthly RIO fields

In [None]:
months = [7,9,11]
year0s = [2020,2050]
yearns = [2040,2070]

for year0,yearn in zip(year0s,yearns):
    for month in months:
        output_filename = f'AVE_RIO_{year0}-{yearn}_{month}.nc'

        rootdir = '/Sea Ice/CNRM/siconc'
        sic,dt,x_cnrm,y_cnrm = concatenate_gcm(rootdir,'siconc',year0,yearn,month)
        #Save a single timestep from the CNRM grid to later reapply masking nan values. 
        nan_mask = sic[0,:,:]
        rootdir = '/Sea Ice/CNRM/sithick'
        sit,dt,x_cnrm,y_cnrm = concatenate_gcm(rootdir,'sithick',year0,yearn,month)
        cnrm_rio = create_rio(sit,sic,ice_class)
        cnrm_rio = np.nanmean(cnrm_rio,axis =0)

        #ECEARTH
        rootdir = 'Sea Ice/ECEARTH/siconc'
        sic,dt,x_temp,y_temp = concatenate_gcm(rootdir,'siconc',year0,yearn,month)
        rootdir = 'Sea Ice/ECEARTH/sithick'
        sit,dt,x_temp,y_temp = concatenate_gcm(rootdir,'sithick',year0,yearn,month)
        temp_rio = create_rio(sit,sic,ice_class)
        temp_rio = np.nanmean(temp_rio,axis =0)
        ecearth_rio = gcm_interp(x_temp,y_temp,temp_rio,x_cnrm,y_cnrm)

        #MPI
        rootdir = '/Sea Ice/MPI/siconc'
        sic,dt,x_temp,y_temp = concatenate_gcm(rootdir,'siconc',year0,yearn,month)
        rootdir = '/Sea Ice/MPI/sithick'
        sit,dt,x_temp,y_temp = concatenate_gcm(rootdir,'sithick',year0,yearn,month)
        temp_rio = create_rio(sit,sic,ice_class)
        temp_rio = np.nanmean(temp_rio,axis =0)
        mpi_rio = gcm_interp(x_temp,y_temp,temp_rio,x_cnrm,y_cnrm)


        #MRI
        rootdir = '/Sea Ice/MRI/siconc'
        sic,dt,x_temp,y_temp = concatenate_gcm(rootdir,'siconc',year0,yearn,month)
        rootdir = '/Sea Ice/MRI/sithick'
        sit,dt,x_temp,y_temp = concatenate_gcm(rootdir,'sithick',year0,yearn,month)
        temp_rio = create_rio(sit,sic,ice_class)
        temp_rio = np.nanmean(temp_rio,axis =0)
        mri_rio = gcm_interp(x_temp,y_temp,temp_rio,x_cnrm,y_cnrm)

        ave_rio = np.array([cnrm_rio,ecearth_rio,mri_rio,mpi_rio])
        ave_rio = np.nanmean(ave_rio,axis =0)
        ave_rio = np.where(np.isnan(nan_mask),np.nan,ave_rio)

        #Save the CNRM for plotting
        x = x_cnrm.shape[0]
        y = x_cnrm.shape[1]

        ice_data_array = xr.DataArray(ave_rio,
                                      dims=('x', 'y'),
                                      coords={'x': np.arange(x),
                                              'y': np.arange(y)},
                                      name='RIO')

        lat_array = xr.DataArray(y_cnrm,
                                 dims=('x', 'y'),
                                 coords={'x': np.arange(x),
                                         'y': np.arange(y)},
                                 name='lat')

        lon_array = xr.DataArray(x_cnrm,
                                 dims=('x', 'y'),
                                 coords={'x': np.arange(x),
                                         'y': np.arange(y)},
                                 name='lon')

        dataset = xr.Dataset({'RIO': ice_data_array, 'lat': lat_array, 'lon': lon_array})
        dataset.to_netcdf(output_filename)
        print(f"NetCDF file '{output_filename}' saved successfully.")