In [1]:
import xarray as xr
import numpy as np
from sklearn.decomposition import PCA
import pydmd
import pandas as pd

import os

import pydmd

import datetime


In [2]:
#set parameters and load forcings

# Load Forcings
forcings_df = pd.read_csv('./data/interpolatedForcing.csv')
forcings  = np.array(forcings_df.iloc[840:]['totalforcing'])


data_path = './data/Evaluation-Tier1' #path to evaluation tier

out_path = './predictions' #directory to save results

method = 'DMDc' #your method name here


#evaluaton variables
dir_names = {'Aday': ['monmaxpr', 'monmaxtasmax', 'monmintasmin'],
             'Amon': ['pr', 'psl', 'tas'],
             'Omon': ['tos']
            }

tier = data_path[-1]


In [8]:
#load data

datasets = {}
for dir_name in dir_names.keys():
    vars = dir_names[dir_name]
    for var in vars:
        # Loop through each file
        directory = os.path.join(data_path, dir_name, var)
        # Define the file path
        listdir = os.listdir(directory)

        data = None
        for i, file in enumerate(listdir, start=1):

            print('File {}/{}'.format(i,len(listdir)), end='\r')
            # Reading temperature file
            ## Open the NetCDF file using xarray
            file_path = os.path.join(directory, file)
            ds = xr.open_dataset(file_path)

            var_short = ds.to_dataframe().columns[0]

            # Specify the coarsening factor
            # Compute monthly anomalies
            raw_data = np.array(ds[var_short])
            climatology = ds[var_short].groupby('time.month').mean(dim='time')
            anomalies = ds[var_short].groupby('time.month') - climatology
            ds.close()

            #convert to numpy array
            tas_cube = np.array(anomalies)
            [t, nlat, nlon] = tas_cube.shape
            tas_all = np.reshape(tas_cube, (t, nlat*nlon))

            #match the submission style
            #$VARIABLE_$MEMBERID_$TIER_$METHOD_$GROUPNAME.nc
            split_fpath = file_path.split('/')
            name_memID = split_fpath[-1].split('.')[0]
            group_name = split_fpath[-3]
            data_name = f'{name_memID}_{tier}_{method}_{group_name}'

            datasets[data_name] = tas_all



File 1/10

In [10]:
#predict forced response on the data

for data_name in datasets.keys():
    data_all  = datasets[data_name]

    #remove nans
    nan_mask = np.all(np.isfinite(data_all), axis=0)
    data = data_all[:,nan_mask]

    ################################
    ################################
    #YOUR METHOD HERE!

    #PCA
    pca = PCA(n_components = 3)
    pca.fit(data)
    data_pca = pca.transform(data)

    #DMDc      
    my_dmdc = pydmd.DMDc(svd_rank=-1)
    my_dmdc.fit(data_pca.T, np.expand_dims(forcings[:-1],0))
    dmdc_eigs = my_dmdc.eigs

    #select mode with biggest real part
    idx = np.argsort(np.real(dmdc_eigs))[::-1][:1]
    sel_modes = my_dmdc.modes[:,idx]
    sel_dynamics = my_dmdc.dynamics[idx,:]
    sel_eigs = dmdc_eigs[idx]

    #predict forced response
    dmdc_pred_forced_resp_pca = np.linalg.multi_dot(
                            [sel_modes, np.diag(sel_eigs), np.linalg.pinv(sel_modes), data_pca[:-1,:].T]) + my_dmdc.B @ np.expand_dims(forcings[:-1],0)

    #add the mask back in
    dmdc_pred_forced_resp = np.empty((t-1,nlat*nlon))
    dmdc_pred_forced_resp[:, nan_mask] = pca.inverse_transform(dmdc_pred_forced_resp_pca.T.real)
    dmdc_pred_forced_resp[:,~nan_mask] = np.nan

    #reshape and predict forced response at first time as the signal at time 1
    dmdc_pred_forced_resp = np.vstack( [tas_all[0], dmdc_pred_forced_resp] )
    dmdc_pred_forced_resp = dmdc_pred_forced_resp.reshape((t, nlat, nlon))

    ################################
    ################################

    #make a xarray object
    forced_resp_xr = ds.copy()
    forced_resp_xr[var_short] = ({'time':ds['time'].values,'lat': ds['lat'].values, 'lon': ds['lon'].values},dmdc_pred_forced_resp)
    forced_resp_xr.attrs = {'creation_date': str(datetime.datetime.now()), 'evaluation_id': '1B', 'method': 'DMDc'}

    #save the result
    f_name = os.path.join(out_path, f'{data_name}.nc')
    forced_resp_xr.to_netcdf(f_name)

