In [None]:
%matplotlib inline

%matplotlib inline
%load_ext autoreload
%autoreload 2

import ERFutils
import dask
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import savgol_filter

import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cftime
import dask
import xarrayutils
import cartopy.crs as ccrs
from xmip.preprocessing import combined_preprocessing
from xmip.preprocessing import replace_x_y_nominal_lat_lon
from xmip.drift_removal import replace_time
from xmip.postprocessing import concat_experiments
import xmip.drift_removal as xm_dr
import xmip as xm
import xesmf as xe
import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import cf_xarray as cfxr
import scipy.signal as signal
from scipy.sparse import diags
from scipy.sparse.linalg import spsolve_triangular

import seaborn as sns
import matplotlib as mpl
import cmocean
import cmocean.cm as cmo
from matplotlib.gridspec import GridSpec

import copy
import os

dask.config.set(**{'array.slicing.split_large_chunks': True})

# Load data and diagnose Green's Functions

In [None]:
model_set = ERFutils.model_set
A = ERFutils.A
ds_out = ERFutils.ds_out

plot = True
savgol = True
save = False

train_id = ['1pctCO2']
output_path = ERFutils.path_to_ERF_outputs

for train in train_id:
    # Load ERF data
    ERF = {}
    ERF_path = f'{output_path}ERF/ERF_{train}_smooth_all_ds.nc4'
    ERF_ds = xr.open_dataset(ERF_path)
    ERF[train] = ERFutils.ds_to_dict(ERF_ds)
    
    # Diagnose Green's Functions
    ds_control, ds_exp, G_ds = ERFutils.create_multimodel_GF_set(ERF, train, model_set, savgol)
    
    # Plot Global Mean Green's Functions (Optional)
    if plot:
        ERFutils.plot_mean_Greens(G_ds, train, overlay = True, save_fig = False)
        
    G_ds2 = G_ds.mean(dim = 'model')
    
    # Save Green's Functions
    if save:
        G_ds.to_netcdf(f'../Outputs/RF_Outputs/G_{train}_ERF_all_ds.nc4')
        G_ds2.to_netcdf(f'../Outputs/RF_Outputs/G_{train}_ERF_mean_ds.nc4')

In [None]:
model_set = ERFutils.model_set
A = ERFutils.A
ds_out = ERFutils.ds_out

train_id = ['1pctCO2']

for train in train_id:
    # Load ERF data
    ERF = {}
    ERF_path = f'../Outputs/RF_Outputs/ERF/ERF_{train}_all_ds.nc4'
    ERF_ds = xr.open_dataset(ERF_path)
    ERF[train] = ERFutils.ds_to_dict(ERF_ds)
    
    tas_path = f'../Outputs/RF_Outputs/tas/tas_CMIP_{train}_all_ds.nc4'
    tas_ds = xr.open_dataset(tas_path)

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
for m in model_set:
    ax.plot(ERF[train][m].ERF.values,alpha=0.5,c='gray')
ax.plot(ERFutils.concat_multirun(ERF[train],'model').mean(dim = 'model').ERF.values,c = 'k', linewidth=2)

In [None]:
# function to perform locally weighted linear regression
def local_weighted_regression(x0, X, Y, tau):
    # add bias term
    x0 = np.r_[1, x0]
    X = np.c_[np.ones(len(X)), X]
     
    # fit model: normal equations with kernel
    xw = X.T * weights_calculate(x0, X, tau)
    theta = np.linalg.pinv(xw @ X) @ xw @ Y
    # "@" is used to
    # predict value
    return x0 @ theta

def weights_calculate(x0, X, tau):
    return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * (tau **2) ))

In [None]:
train_id = ['1pctCO2']
from numpy import linalg as LA

for train in train_id:
    # Load ERF data
    ERF = {}
    ERF_path = f'../Outputs/RF_Outputs/ERF/ERF_{train}_all_ds.nc4'
    ERF_ds = xr.open_dataset(ERF_path)
    ERF[train] = ERFutils.ds_to_dict(ERF_ds)
    
    tas_path = f'../Outputs/RF_Outputs/tas/tas_CMIP_{train}_all_ds.nc4'
    tas_ds = xr.open_dataset(tas_path)
    
ERF_all = ERFutils.concat_multirun(ERF[train],'model').mean(dim = 'model')
tas_glob_mean = tas_ds.weighted(A).mean(dim = ['lat','lon']).mean(dim = ['model'])
tas_glob_mean = tas_glob_mean.rename({'s': 'year'})
tas_glob_mean = tas_glob_mean.sel(year = slice(ERF_all['year'].min(), ERF_all['year'].max()))

domain = np.linspace(ERF_all.year.values[0], ERF_all.year.values[-1], num=len(ERF_all.year.values))
X1 = ERF_all.year.values
Y1 = ERF_all.ERF.values

X2 = tas_glob_mean.year.values
Y2 = tas_glob_mean.tas.values

N_years = len(ERF_all['year'])
offsets = [i for i in range(0,-N_years,-1)]

for tau in [9,10,11]:
    fig, ax = plt.subplots(figsize=(10,5))
    for i in range(2,9):
        ERF_pred = [local_weighted_regression(x0, X1, Y1, tau) for x0 in domain]
        tas_pred = [local_weighted_regression(x0, X2, Y2, tau) for x0 in domain]
        input_matrix = diags(ERF_pred + ERF_pred[i],offsets=offsets,shape=(N_years,N_years),format='csr')
        array_mat = input_matrix.toarray()
        cond_num = LA.cond(array_mat)
        
        #print(f'ERF[i] = {ERF_pred[i]}, tas[i] = {tas_pred[i]}')
        #print(f'i = {i}, Condition Number: {cond_num}')
        
        G = spsolve_triangular(input_matrix,tas_pred + tas_pred[i],lower=True)
        ax.plot(G,label = f'i = {i}')
        print(np.trapz(G))
    
    ax.set_title(f'tau = {tau}')
    ax.legend()


In [None]:
train_id = ['1pctCO2']#,'ssp245','ssp370','ssp585','1pctCO2']
from numpy import linalg as LA
from scipy.linalg import solve
from scipy.sparse.linalg import spsolve

A = ERFutils.A

for train in train_id:
    # Load ERF data
    ERF = {}
    ERF_path = f'../Outputs/RF_Outputs/ERF/ERF_{train}_all_ds.nc4'
    
    #ERF_path_hist = f'../Outputs/RF_Outputs/ERF/ERF_historical_all_ds.nc4'
    #ERF_ssp = xr.open_dataset(ERF_path)
    #ERF_hist = xr.open_dataset(ERF_path_hist)

    #ERF_ds = xr.concat([ERF_hist,ERF_ssp.assign_coords(year = range(165,250))],dim = 'year')
    
    ERF_ds = xr.open_dataset(ERF_path)
    ERF[train] = ERFutils.ds_to_dict(ERF_ds)
    
    tas_path = f'../Outputs/RF_Outputs/tas/tas_CMIP_{train}_all_ds.nc4'
    tas_ds = xr.open_dataset(tas_path)
    
    ERF_all = ERFutils.concat_multirun(ERF[train],'model').mean(dim = 'model')
    tas_all = tas_ds.mean(dim = ['model'])
    #if 'ssp' in train:
    #    tas_all = tas_all.sel(s = slice(165,250)).assign_coords(s = range(0,85))

    tas_all = tas_all.rename({'s': 'year'})    
    #tas_all = tas_all.sel(year = slice(ERF_all['year'].min(), ERF_all['year'].max()))

    X1 = ERF_all.year.values
    Y1 = ERF_all.ERF.values

    X2 = tas_all.year.values
    Y2 = tas_all.weighted(A).mean(dim = ['lat','lon']).tas.values

    tau = 20
    j = 4
    
    N_years = len(ERF_all['year']) - j
    offsets = [i for i in range(0,-N_years,-1)]
    domain = np.linspace(ERF_all.year.values[j], ERF_all.year.values[-1], num=N_years)

    ERF_pred = [local_weighted_regression(x0, X1, Y1, tau) for x0 in domain]
    tas_pred = [local_weighted_regression(x0, X2, Y2, tau) for x0 in domain]

    input_matrix = diags(ERF_pred,offsets=offsets,shape=(N_years,N_years),format='csr')
    G_glob = spsolve_triangular(input_matrix,tas_pred,lower=True)

    array_mat = input_matrix.toarray()
    cond_num = LA.cond(array_mat)
    print(f'Cond: {cond_num}')

    plt.plot(G_glob,label=j)
plt.legend()

In [None]:
train_id = ['ssp585']#,'ssp245','ssp370','ssp585','1pctCO2']
from numpy import linalg as LA
from scipy.linalg import solve
from scipy.sparse.linalg import spsolve

A = ERFutils.A

for train in train_id:
    # Load ERF data
    ERF = {}
    ERF_path = f'../Outputs/RF_Outputs/ERF/ERF_{train}_all_ds.nc4'
    
    ERF_path_hist = f'../Outputs/RF_Outputs/ERF/ERF_historical_all_ds.nc4'
    ERF_ssp = xr.open_dataset(ERF_path)
    ERF_hist = xr.open_dataset(ERF_path_hist)

    ERF_ds = xr.concat([ERF_hist,ERF_ssp.assign_coords(year = range(165,250))],dim = 'year')
    
    #ERF_ds = xr.open_dataset(ERF_path)
    ERF[train] = ERFutils.ds_to_dict(ERF_ds)
    
    tas_path = f'../Outputs/RF_Outputs/tas/tas_CMIP_{train}_all_ds.nc4'
    tas_ds = xr.open_dataset(tas_path)
    
    ERF_all = ERFutils.concat_multirun(ERF[train],'model').mean(dim = 'model')
    tas_all = tas_ds.mean(dim = ['model'])
    #if 'ssp' in train:
    #    tas_all = tas_all.sel(s = slice(165,250)).assign_coords(s = range(0,85))

    tas_all = tas_all.rename({'s': 'year'})    
    #tas_all = tas_all.sel(year = slice(ERF_all['year'].min(), ERF_all['year'].max()))

    X1 = ERF_all.year.values
    Y1 = ERF_all.ERF.values

    X2 = tas_all.year.values
    Y2 = tas_all.weighted(A).mean(dim = ['lat','lon']).tas.values

    #tau = 20
    #j = 75
    for tau in [20]:
        for j in [165]:
            N_years = len(ERF_all['year']) - j
            offsets = [i for i in range(0,-N_years,-1)]
            domain = np.linspace(ERF_all.year.values[j], ERF_all.year.values[-1], num=N_years)

            ERF_pred = [local_weighted_regression(x0, X1, Y1, tau) for x0 in domain]
            tas_pred = [local_weighted_regression(x0, X2, Y2, tau) for x0 in domain]

            input_matrix = diags(ERF_pred,offsets=offsets,shape=(N_years,N_years),format='csr')
            G_glob = spsolve_triangular(input_matrix,tas_pred,lower=True)

            array_mat = input_matrix.toarray()
            cond_num = LA.cond(array_mat)
            print(f'Cond: {cond_num}')

            plt.plot(G_glob,label=f'{tau}, {j}')
plt.legend()

In [None]:

#plt.plot(G.weighted(A).mean(dim = ['lat','lon'])['G[tas]'])
plt.plot(ERF_all.ERF.values)
plt.plot(tas_all.weighted(A).mean(dim = ['lat','lon']).tas.values)
plt.plot(ERF_pred)
plt.plot(tas_pred)

In [None]:
    

    # Have to create the Green's functions locally, stack data array
    stacked_response = tas_all.stack(allpoints=['lat','lon'])
    N_latlong = len(stacked_response.tas.values[0])
    stacked_tas = stacked_response.tas.values

    # Convert to np arrays, xarray indexing is too slow
    G_stacked = np.zeros((N_years,N_latlong))

    # Calculate local Green's functions, matrix is LD by construction
    for n in range(N_latlong):
        if n % 10000 == 0:
            print(n)
        Y3 = stacked_tas[:,n]
        stacked_response_local = [local_weighted_regression(x0, X2, Y3, tau) for x0 in domain]
        G_stacked[:,n] = spsolve_triangular(input_matrix,stacked_response_local,lower=True)

    # Get G into the correct format
    G = xr.Dataset(coords={'lon': ('lon', tas_all.lon.values),
                            'lat': ('lat', tas_all.lat.values),
                            'year': ('year', range(N_years))})
    G = G.stack(allpoints=['lat','lon'])
    G['G[tas]'] = (('year','allpoints'),G_stacked)
    G = G.unstack('allpoints')

    G['year'] = G['year'] - G['year'][0]

    G.to_netcdf(f'../Outputs/RF_Outputs/GFs/G_loess_{train}_ERF_mean_ds.nc4')