In [2]:
import pandas as pd
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import anderson_ksamp
from scipy.stats import ks_2samp

import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

gcms = ["5ModelAvg", "GFDL-CM3", "GISS-E2-R", "IPSL-CM5A-LR", "MRI-CGCM3", "CCSM4"]
rcps = ["rcp45", "rcp60", "rcp85"]
clim_vars = ["pr_total_mm", "tas_mean_C"]

with open('extracted_data/ak_station_extractions.pickle', 'rb') as handle:
    di = pickle.load(handle)

In [3]:
def compute_cv(sample):
    cv = sample.mean() / sample.std()
    return round(cv, 2)


def nrmse(obs, predicted):
    # Calculate the RMSE
    rmse = np.sqrt(np.mean((predicted - obs) ** 2))

    # Calculate the range of the observed data
    data_range = max(observed) - min(obs)

    # Calculate the NRMSE
    nrmse = rmse / data_range

    return np.round(nrmse, 1), np.round(rmse, 1)


def compute_bias(obs, predicted):
    bias = sum(obs - predicted) * 1.0 / len(obs)
    return round(bias, 1)

In [4]:
cv_vals = []
delta_cv_vals = []
nrmse_vals = []
rmse_vals = []
ad_vals = []
ks_vals = []
max_vals = []
delta_max_vals = []
bias_vals = []

stations = []
stat_rcp = []
stat_gcm = []
stat_clim_vars = []

for station in di.keys():
    for model in gcms:
        for rcp in rcps:
            for var in clim_vars:
                modeled = di[station]["extracted_data"].loc[model].loc[rcp].loc[var]
                modeled = modeled.set_index(pd.DatetimeIndex(modeled.index.values))
                
                observed = di[station][var]
                assert len(observed) == 204
                
                stations.append(station)
                stat_rcp.append(rcp)
                stat_gcm.append(model)
                stat_clim_vars.append(var)
                
                cv_vals.append(compute_cv(modeled).value)
                delta_cv_vals.append(abs(compute_cv(observed) - compute_cv(modeled)).value)
                se_vals = nrmse(observed, modeled.value.values)
                nrmse_vals.append(se_vals[0])
                rmse_vals.append(se_vals[1])
                bias_vals.append(compute_bias(observed, modeled.value.values))
                
                
                max_val = round(modeled.value.max(), 1)
                max_vals.append(max_val)
                delta_max_vals.append(round((observed.max() - max_val), 1))
                
                
                a, b, c = anderson_ksamp([modeled.value.values, observed])
                ad_vals.append([a, b, c])
                
                ks_vals.append(ks_2samp(modeled.value.values, observed, method="exact"))

In [5]:
statdf = pd.DataFrame(nrmse_vals)
statdf["Station"] = stations
statdf["Scenario"] = stat_rcp
statdf["Model"] = stat_gcm
statdf["Variable"] = stat_clim_vars

statdf["CV"] = cv_vals
statdf["Bias"] = bias_vals
statdf["NRMSE"] = nrmse_vals
statdf["RMSE"] = rmse_vals
statdf["CV Delta From Observed"] = delta_cv_vals
statdf["Max. Value"] = max_vals
statdf["Max. Value Delta From Observed"] = delta_max_vals



#statdf["A-D statistic"] = [round(x[0], 2) for x in ad_vals]
#statdf["A-D critical values"] = [x[1] for x in ad_vals]
statdf["A-D p-value"] = [round(x[2], 2) for x in ad_vals]

#statdf["K-S statistic"] = [round(x.statistic, 2) for x in ks_vals]
statdf["K-S p-value"] = [round(x.pvalue, 2) for x in ks_vals]
#statdf["K-S statistic location"] = [round(x.statistic_location, 2) for x in ks_vals]
statdf["K-S statistic sign"] = [round(x.statistic_sign, 2) for x in ks_vals]


del statdf[0]
statdf

Unnamed: 0,Station,Scenario,Model,Variable,CV,Bias,NRMSE,RMSE,CV Delta From Observed,Max. Value,Max. Value Delta From Observed,A-D p-value,K-S p-value,K-S statistic sign
0,Juneau,rcp45,5ModelAvg,pr_total_mm,2.26,7.9,0.2,70.8,0.33,386.0,-3.7,0.01,0.09,-1
1,Juneau,rcp45,5ModelAvg,tas_mean_C,1.07,-0.7,0.1,2.3,0.16,15.9,0.3,0.16,0.28,-1
2,Juneau,rcp60,5ModelAvg,pr_total_mm,2.11,10.1,0.2,70.9,0.18,423.0,-40.7,0.00,0.01,1
3,Juneau,rcp60,5ModelAvg,tas_mean_C,1.07,-0.8,0.1,2.3,0.16,15.9,0.3,0.11,0.34,-1
4,Juneau,rcp85,5ModelAvg,pr_total_mm,2.15,8.6,0.2,69.8,0.22,386.0,-3.7,0.00,0.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,Utqiaġvik,rcp45,CCSM4,tas_mean_C,-0.84,0.8,0.1,4.1,0.02,9.7,-0.6,0.25,0.72,1
320,Utqiaġvik,rcp60,CCSM4,pr_total_mm,1.29,-4.4,0.2,14.2,0.16,99.0,-41.1,0.00,0.00,-1
321,Utqiaġvik,rcp60,CCSM4,tas_mean_C,-0.81,0.7,0.1,4.1,0.01,9.2,-0.1,0.25,0.64,1
322,Utqiaġvik,rcp85,CCSM4,pr_total_mm,1.28,-4.9,0.2,13.9,0.15,73.0,-15.1,0.00,0.00,-1


In [6]:
statdf.round(2).to_csv("model_perf.csv", index=False)

In [None]:
prdf = statdf[statdf.Variable == "pr_total_mm"]
tasdf = statdf[statdf.Variable == "tas_mean_C"]

In [None]:
tasdf[tasdf.Station=="Juneau"].sort_values("Max. Value Delta From Observed")

In [None]:
prdf[prdf.Station=="Juneau"].sort_values("RMSE")

In [None]:
prdf[prdf.Station=="Anchorage"].sort_values("Max. Value Delta From Observed")

In [None]:
pivoted = prdf.pivot_table(index = ['Model','Scenario'],
                           values = ["Max. Value Delta From Observed"],
                           columns = 'Station',
                           aggfunc = 'mean')
pivoted.round()

In [None]:
fbx = di["Fairbanks"]["extracted_data"]
fbx_pr = fbx[fbx.index.get_level_values(2) == 'pr_total_mm']
fbx_pr.reset_index(inplace=True, level = ['model', 'scenario'])
fbx_pr.set_index(fbx_pr.index.droplevel(0), inplace=True)
fbx_pr.set_index(pd.DatetimeIndex(fbx_pr.index), inplace=True)

fbx_pr

In [None]:
fbx

In [None]:
tx = pd.DatetimeIndex(fbx.loc[("MRI-CGCM3", "rcp85", "tas_mean_C")].index).sort_values()
obs = pd.DataFrame({"value": di["Fairbanks"]["pr_total_mm"]}).set_index(tx)
obs

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,5))
sns.lineplot(data=obs.cumsum(), x="date", y="value", ax=ax)

sns.lineplot(
    data=fbx_pr.cumsum(), x="date", y="value", hue="model", style="scenario", ax=ax
)

