This notebook calculates bias and RMSE for the different sampling experiments and the interquartile range. Reconstructions are compared to the "model truth" of the Large Ensemble Testbed. 

This notebook was created by Thea Hatlen Heimdal, in collaboration with Abby Shaum Julius Busecke. 

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import pickle
import matplotlib.pyplot as plt

In [2]:
# List of all LET members

path_LET = "/local/data/artemis/workspace/theimdal/saildrone/LET_pickle_files/members_LET_dict.pickle" 

with open(path_LET,'rb') as handle:
    mems_dict = pickle.load(handle)

## Bring in reconstructions and testbed truth

### SOCAT baseline

In [3]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/SOCAT/models/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_SOCAT_only = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Historical + SOCAT

### Baseline

In [4]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats/baseline/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_historical_baseline = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Biased floats

In [5]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats/bias4/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_historical_bias = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Floats with random uncertainty


In [6]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats/uncertainty11/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_historical_uncertainty = xr.concat(datasets_ens, dim='ensemble', join='override') 

## Optimized + SOCAT

### Baseline

In [7]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats_optimized/baseline/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_optimized_baseline = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Biased floats

In [8]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats_optimized/bias4/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_optimized_bias = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Floats with random uncertainty

In [9]:
datasets_ens = []
for ens, mem_list in mems_dict.items():
    datasets_member = []
    for n_member, member in enumerate(mem_list):
        if ens == "CanESM2":
            date_str = '201712'
        else:
            date_str = '201701'
        truth_path = f"/data/artemis/simulations/LET/{ens}/member_{member}/pCO2_2D_mon_{ens}{member}_1x1_198201-{date_str}.nc"
        recon_path = f"/data/artemis/workspace/theimdal/GO-BGC/models/500_floats_optimized/uncertainty11/reconstructions/xg/{ens}/member_{member}/recon_pCO2DIC_pCO2_2D_mon_{ens}_{member}_1x1_198201-201701.nc"
        kwargs = dict(chunks={'time':-1})
        
        truth = xr.open_dataset(truth_path, **kwargs).pCO2
        recon = xr.open_dataset(recon_path, **kwargs).pCO2_unseen_recon
        
        # assign_dimension coordinates
        truth = truth.assign_coords(status='truth')
        recon = recon.assign_coords(status='reconstructed')
        
        # make sure they have the same time coordinates
        truth = truth.assign_coords(time=recon['time'].data)
        
        member_ds = xr.concat([truth, recon], dim='status')
        
        # add member_dimension and coordinate
        member_ds = member_ds.assign_coords(n_member=n_member, member=member)
        
        datasets_member.append(member_ds)
    
    ds_ens = xr.concat(datasets_member, dim='n_member')
    ds_ens = ds_ens.assign_coords(ensemble=ens)
    
    
    datasets_ens.append(ds_ens)
    
ds_500_optimized_uncertainty = xr.concat(datasets_ens, dim='ensemble', join='override') 

### Calculate global bias over 2000-2016

In [4]:
SOCAT = ds_SOCAT_only.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])

historical_baseline = ds_500_historical_baseline.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])
historical_bias = ds_500_historical_bias.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])
historical_uncertainty = ds_500_historical_uncertainty.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])

optimized_baseline = ds_500_optimized_baseline.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])
optimized_bias = ds_500_optimized_bias.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])
optimized_uncertainty = ds_500_optimized_uncertainty.diff('status').sel(time=slice("2000","2016")).mean(['time', 'ylat', 'xlon','status'])

### Calculate 1 IQR

#### SOCAT baseline

In [11]:
q75_socat, q25_socat = np.percentile(SOCAT, [75 ,25])
iqr_socat = q75_socat - q25_socat
iqr_socat, q75_socat, q25_socat

(0.5212551459037529, 0.8858042701986477, 0.36454912429489483)

#### Historical

In [12]:
q75_hbas, q25_hbas = np.percentile(historical_baseline, [75 ,25])
iqr_hbas = q75_hbas - q25_hbas
iqr_hbas, q75_hbas, q25_hbas

(0.35669344806296294, 0.26921840224756466, -0.08747504581539828)

In [13]:
q75_hbias, q25_hbias = np.percentile(historical_bias, [75 ,25])
iqr_hbias = q75_hbias - q25_hbias
iqr_hbias, q75_hbias, q25_hbias

(0.27403067315068874, 1.3012048635315425, 1.0271741903808538)

In [14]:
q75_hunc, q25_hunc = np.percentile(historical_uncertainty, [75 ,25])
iqr_hunc = q75_hunc - q25_hunc
iqr_hunc, q75_hunc, q25_hunc

(0.3268171392499563, 0.27230483436800024, -0.054512304881956075)

#### Optimized

In [15]:
q75_obas, q25_obas = np.percentile(optimized_baseline, [75 ,25])
iqr_obas = q75_obas - q25_obas
iqr_obas, q75_obas, q25_obas

(0.14325477692158067, 0.04515925882100944, -0.09809551810057124)

In [16]:
q75_obias, q25_obias = np.percentile(optimized_bias, [75 ,25])
iqr_obias = q75_obias - q25_obias
iqr_obias, q75_obias, q25_obias

(0.16623165582402688, 1.624613285673324, 1.458381629849297)

In [17]:
q75_ounc, q25_ounc = np.percentile(optimized_uncertainty, [75 ,25])
iqr_ounc = q75_ounc - q25_ounc
iqr_ounc, q75_ounc, q25_ounc

(0.15151229252162507, 0.031841268959600225, -0.11967102356202486)

### Calculate global RMSE over 2000-2016

In [10]:
RMSE_SOCAT = xr.ufuncs.sqrt(((ds_SOCAT_only.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))

RMSE_historical_baseline = xr.ufuncs.sqrt(((ds_500_historical_baseline.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))
RMSE_historical_bias = xr.ufuncs.sqrt(((ds_500_historical_bias.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))
RMSE_historical_uncertainty = xr.ufuncs.sqrt(((ds_500_historical_uncertainty.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))

RMSE_optimized_baseline = xr.ufuncs.sqrt(((ds_500_optimized_baseline.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))
RMSE_optimized_bias = xr.ufuncs.sqrt(((ds_500_optimized_bias.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))
RMSE_optimized_uncertainty = xr.ufuncs.sqrt(((ds_500_optimized_uncertainty.diff('status'))**2).sel(time=slice('2000','2016')).mean(['time', 'ylat', 'xlon','status']))

### Calculate 1 IQR

#### SOCAT baseline

In [11]:
q75_socat, q25_socat = np.percentile(RMSE_SOCAT, [75 ,25])
iqr_socat = q75_socat - q25_socat
iqr_socat

0.3383394142414886

#### Historical

In [12]:
q75_hbas, q25_hbas = np.percentile(RMSE_historical_baseline, [75 ,25])
iqr_hbas = q75_hbas - q25_hbas
iqr_hbas

0.14599076437849057

In [13]:
q75_hbias, q25_hbias = np.percentile(RMSE_historical_bias, [75 ,25])
iqr_hbias = q75_hbias - q25_hbias
iqr_hbias

0.18133205687982112

In [14]:
q75_hunc, q25_hunc = np.percentile(RMSE_historical_uncertainty, [75 ,25])
iqr_hunc = q75_hunc - q25_hunc
iqr_hunc

0.20547132380134947

#### Optimized

In [15]:
q75_obas, q25_obas = np.percentile(RMSE_optimized_baseline, [75 ,25])
iqr_obas = q75_obas - q25_obas
iqr_obas

0.14213010009073912

In [16]:
q75_obias, q25_obias = np.percentile(RMSE_optimized_bias, [75 ,25])
iqr_obias = q75_obias - q25_obias
iqr_obias

0.17011829714364524

In [17]:
q75_ounc, q25_ounc = np.percentile(RMSE_optimized_uncertainty, [75 ,25])
iqr_ounc = q75_ounc - q25_ounc
iqr_ounc

0.1602009141359062