# This notebook aims at processing the data (redefine the dates)

In [1]:
import numpy as np
import matplotlib
import netCDF4 as netcdf

## 1- Extract the metadata 

In [3]:
import os

# Get the list of all files and directories
path = "/net/atmos/data/cmip6-ng/tos/ann/g025"
dir_list = os.listdir(path)

print("Files and directories in '", path, "' :")

dic_files = {}

list_model = []
list_forcing = []

for idx, file in enumerate(dir_list):

    file_split = file.split("_")
    
    # extract model names
    model_name = file_split[2]
    forcing = file_split[3]
    run_name = file_split[4]
    
    
    list_model.append(model_name)
    list_forcing.append(forcing)
    
model_names = list(set(list_model))
forcing_names = list(set(list_forcing))

# define names dictionary 
dic_model_forcing = {}

for idx,model in enumerate(model_names):
    print(idx, model)
    dic_model_forcing[model] = {}
    for forcing in forcing_names:
        dic_model_forcing[model][forcing] = {}

Files and directories in ' /net/atmos/data/cmip6-ng/tos/ann/g025 ' :
0 CESM2-FV2
1 NorESM2-MM
2 MRI-ESM2-0
3 MCM-UA-1-0
4 NorESM1-F
5 GISS-E2-1-G-CC
6 MPI-ESM1-2-HR
7 ACCESS-ESM1-5
8 CAS-ESM2-0
9 FIO-ESM-2-0
10 GISS-E3-G
11 E3SM-2-0-NARRM
12 KACE-1-0-G
13 SAM0-UNICON
14 ACCESS-CM2
15 NorCPM1
16 EC-Earth3-LR
17 TaiESM1
18 E3SM-1-1-ECA
19 GFDL-ESM4
20 MIROC-ES2L
21 CanESM5-1
22 HadGEM3-GC31-MM
23 CIESM
24 CNRM-ESM2-1
25 EC-Earth3-Veg
26 UKESM1-0-LL
27 UKESM1-1-LL
28 CNRM-CM6-1
29 INM-CM5-0
30 GISS-E2-1-G
31 KIOST-ESM
32 MIROC-ES2H
33 IPSL-CM5A2-INCA
34 ICON-ESM-LR
35 E3SM-2-0
36 FGOALS-g3
37 IPSL-CM6A-LR-INCA
38 BCC-CSM2-MR
39 CAMS-CSM1-0
40 HadGEM3-GC31-LL
41 EC-Earth3-AerChem
42 NorESM2-LM
43 EC-Earth3-Veg-LR
44 MPI-ESM-1-2-HAM
45 CMCC-CM2-SR5
46 IPSL-CM6A-LR
47 BCC-ESM1
48 CESM2-WACCM-FV2
49 CMCC-CM2-HR4
50 AWI-CM-1-1-MR
51 GISS-E2-2-H
52 CESM2-WACCM
53 GISS-E2-2-G
54 E3SM-1-0
55 GISS-E2-1-H
56 GFDL-CM4
57 NESM3
58 CMCC-ESM2
59 AWI-ESM-1-1-LR
60 INM-CM4-8
61 CNRM-CM6-1-HR
62 CanESM5-C

In [22]:
M = np.zeros((144,72))
for i in range(72):
    M[2*i,i] = 1/2
    M[2*i+1,i] = 1/2

In [23]:
U = np.random.rand(72,144)
print(np.mean(U[0,0:2]))
print(np.dot(U,M))

0.5041531848640128
[[0.50415318 0.83244125 0.26538955 ... 0.28313603 0.63134567 0.76045558]
 [0.93093481 0.59294451 0.52581834 ... 0.36080528 0.6887384  0.49133394]
 [0.64089404 0.44629855 0.7297673  ... 0.51201998 0.43541673 0.50667414]
 ...
 [0.18007497 0.33756892 0.32138285 ... 0.70694561 0.26053805 0.35306225]
 [0.34286297 0.16449366 0.35708145 ... 0.67564644 0.16997748 0.26957164]
 [0.6423668  0.16388931 0.59586199 ... 0.7869658  0.21915291 0.49949443]]


## 2- Extract the data from each file (model, forcing, ensemble member)

In [7]:
for idx, file in enumerate(dir_list):

    file_split = file.split("_")

    # extract model names
    model = file_split[2]
    forcing = file_split[3]
    run_name = file_split[4]
    
    if model in list(dic_model_forcing.keys()):
          
        # read files in the directory
        file2read = netcdf.Dataset(path +'/'+ file,'r')
        

        # set variables
        time = np.array(file2read.variables['time'][:])
        longitude = np.array(file2read.variables['lon'][:])
        latitude = np.array(file2read.variables['lat'][:])
        tos = np.array(file2read.variables['tos'][:])
        
        
        if (forcing == 'historical'):
            print(file)
            print(time/365)
            
        if (forcing == 'ssp585'):
            print(file)
            print(time/365)
            
        # assign nans to non-sea values
        tos[tos>1e19] = np.nan
        idx_nans = np.argwhere(np.isnan(tos))

        # get the data
        dic_model_forcing[model][forcing][run_name]= tos

tos_ann_AWI-CM-1-1-MR_historical_r1i1p1f1_g025.nc
tos_ann_AWI-CM-1-1-MR_historical_r2i1p1f1_g025.nc
tos_ann_AWI-CM-1-1-MR_historical_r3i1p1f1_g025.nc
tos_ann_AWI-CM-1-1-MR_historical_r4i1p1f1_g025.nc
tos_ann_AWI-CM-1-1-MR_historical_r5i1p1f1_g025.nc
tos_ann_BCC-CSM2-MR_historical_r1i1p1f1_g025.nc
tos_ann_BCC-CSM2-MR_historical_r2i1p1f1_g025.nc
tos_ann_BCC-CSM2-MR_historical_r3i1p1f1_g025.nc
tos_ann_BCC-ESM1_historical_r1i1p1f1_g025.nc
tos_ann_BCC-ESM1_historical_r2i1p1f1_g025.nc
tos_ann_BCC-ESM1_historical_r3i1p1f1_g025.nc
tos_ann_CAMS-CSM1-0_historical_r1i1p1f1_g025.nc
tos_ann_CAMS-CSM1-0_historical_r2i1p1f1_g025.nc
tos_ann_CAMS-CSM1-0_historical_r1i1p1f2_g025.nc
tos_ann_CESM2-WACCM_historical_r1i1p1f1_g025.nc
tos_ann_CESM2-WACCM_historical_r2i1p1f1_g025.nc
tos_ann_CESM2-WACCM_historical_r3i1p1f1_g025.nc
tos_ann_CESM2_historical_r10i1p1f1_g025.nc
tos_ann_CESM2_historical_r11i1p1f1_g025.nc
tos_ann_CESM2_historical_r1i1p1f1_g025.nc
tos_ann_CESM2_historical_r2i1p1f1_g025.nc
tos_ann_CESM2

tos_ann_GISS-E2-1-H_historical_r6i1p1f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r7i1p1f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r8i1p1f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r9i1p1f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r1i1p1f2_g025.nc
tos_ann_GISS-E2-1-H_historical_r2i1p3f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r1i1p3f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r3i1p3f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r4i1p3f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r5i1p3f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r2i1p1f2_g025.nc
tos_ann_GISS-E2-1-H_historical_r3i1p1f2_g025.nc
tos_ann_GISS-E2-1-H_historical_r5i1p1f2_g025.nc
tos_ann_GISS-E2-1-H_historical_r4i1p1f2_g025.nc
tos_ann_GISS-E2-1-H_historical_r1i1p5f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r2i1p5f1_g025.nc
tos_ann_GISS-E2-1-H_historical_r3i1p5f1_g025.nc
tos_ann_HadGEM3-GC31-LL_historical_r1i1p1f3_g025.nc
tos_ann_HadGEM3-GC31-LL_historical_r2i1p1f3_g025.nc
tos_ann_HadGEM3-GC31-LL_historical_r3i1p1f3_g025.nc
tos_ann_HadGEM3-GC31-LL_hist

tos_ann_CNRM-CM6-1_ssp585_r5i1p1f2_g025.nc
tos_ann_CNRM-CM6-1_ssp585_r6i1p1f2_g025.nc
tos_ann_CNRM-ESM2-1_ssp585_r1i1p1f2_g025.nc
tos_ann_CNRM-ESM2-1_ssp585_r2i1p1f2_g025.nc
tos_ann_CNRM-ESM2-1_ssp585_r3i1p1f2_g025.nc
tos_ann_CNRM-ESM2-1_ssp585_r4i1p1f2_g025.nc
tos_ann_CNRM-ESM2-1_ssp585_r5i1p1f2_g025.nc
tos_ann_CanESM5_ssp585_r10i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r10i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r1i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r1i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r2i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r2i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r3i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r3i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r4i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r4i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r5i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r5i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r6i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r6i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r7i1p1f1_g025.nc
tos_ann_CanESM5_ssp585_r7i1p2f1_g025.nc
tos_ann_CanESM5_ssp585_r8i1p1f1_g025.nc
tos_ann_CanE

tos_ann_MIROC6_historical_r15i1p1f1_g025.nc
tos_ann_MIROC6_historical_r37i1p1f1_g025.nc
tos_ann_MIROC6_historical_r13i1p1f1_g025.nc
tos_ann_MIROC6_historical_r19i1p1f1_g025.nc
tos_ann_MIROC6_historical_r35i1p1f1_g025.nc
tos_ann_MIROC6_historical_r32i1p1f1_g025.nc
tos_ann_MIROC6_historical_r43i1p1f1_g025.nc
tos_ann_MIROC6_historical_r29i1p1f1_g025.nc
tos_ann_MIROC6_historical_r26i1p1f1_g025.nc
tos_ann_MIROC6_historical_r20i1p1f1_g025.nc
tos_ann_MIROC6_historical_r50i1p1f1_g025.nc
tos_ann_MIROC6_historical_r40i1p1f1_g025.nc
tos_ann_MIROC6_historical_r11i1p1f1_g025.nc
tos_ann_MIROC6_historical_r38i1p1f1_g025.nc
tos_ann_MIROC6_historical_r44i1p1f1_g025.nc
tos_ann_MIROC6_historical_r34i1p1f1_g025.nc
tos_ann_MIROC6_historical_r12i1p1f1_g025.nc
tos_ann_MIROC6_historical_r45i1p1f1_g025.nc
tos_ann_MIROC6_historical_r47i1p1f1_g025.nc
tos_ann_MIROC6_historical_r27i1p1f1_g025.nc
tos_ann_MIROC6_historical_r46i1p1f1_g025.nc
tos_ann_MIROC6_historical_r42i1p1f1_g025.nc
tos_ann_CESM2-FV2_historical_r3i

tos_ann_ACCESS-ESM1-5_historical_r12i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r7i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r9i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r5i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r8i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r10i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r6i1p1f1_g025.nc
tos_ann_ACCESS-ESM1-5_ssp585_r4i1p1f1_g025.nc
tos_ann_KIOST-ESM_ssp585_r1i1p1f1_g025.nc
tos_ann_MRI-ESM2-0_historical_r1i1000p1f1_g025.nc
tos_ann_GFDL-ESM4_historical_r2i1p1f1_g025.nc
tos_ann_GFDL-ESM4_historical_r3i1p1f1_g025.nc
tos_ann_HadGEM3-GC31-MM_ssp585_r4i1p1f3_g025.nc
tos_ann_EC-Earth3-AerChem_historical_r1i1p1f1_g025.nc
tos_ann_EC-Earth3_ssp585_r3i1p1f1_g025.nc
tos_ann_CIESM_historical_r1i1p1f1_g025.nc
tos_ann_EC-Earth3-Veg_historical_r12i1p1f1_g025.nc
tos_ann_EC-Earth3-Veg_historical_r14i1p1f1_g025.nc
tos_ann_E3SM-1-1_ssp585_r1i1p1f1_g025.nc
tos_ann_MPI-ESM-1-2-HAM_historical_r3i1p1f1_g025.nc
tos_ann_MIROC-ES2L_ssp585_r3i1p1f2_g025.nc
tos_ann_MIROC-ES2L

tos_ann_MPI-ESM1-2-LR_historical_r19i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r20i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r21i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r22i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r23i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r24i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r25i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r26i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r27i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r28i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r29i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r30i1p1f1_g025.nc
tos_ann_IPSL-CM6A-LR_historical_r33i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r13i1p1f1_g025.nc
tos_ann_CMCC-CM2-SR5_historical_r4i1p2f1_g025.nc
tos_ann_CMCC-CM2-SR5_historical_r5i1p2f1_g025.nc
tos_ann_CMCC-CM2-SR5_historical_r6i1p2f1_g025.nc
tos_ann_CMCC-CM2-SR5_historical_r7i1p2f1_g025.nc
tos_ann_IPSL-CM6A-LR_ssp585_r33i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_ssp585_r11i1p1f1_g025.n

tos_ann_CanESM5-1_ssp585_r9i1p1f1_g025.nc
tos_ann_CanESM5-1_ssp585_r9i1p2f1_g025.nc
tos_ann_ACCESS-CM2_ssp585_r10i1p1f1_g025.nc
tos_ann_ACCESS-CM2_ssp585_r6i1p1f1_g025.nc
tos_ann_ACCESS-CM2_ssp585_r7i1p1f1_g025.nc
tos_ann_ACCESS-CM2_ssp585_r8i1p1f1_g025.nc
tos_ann_ACCESS-CM2_ssp585_r9i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r31i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r32i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r33i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r34i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r37i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r38i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r39i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r40i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r47i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r48i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r50i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r35i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r36i1p1f1_g025.nc
tos_ann_MPI-ESM1-2-LR_historical_r49i

## 3- Split data into historical data from 1850 until 2014 and forecast data from 2014 until 2100

In [4]:
forcing_hist  = "historical"
forcing_ssp585 = "ssp585"
# for key in dic_model_forcing['CanESM5'][forcing_hist].keys():
#     print(dic_model_forcing['CanESM5'][forcing_hist][key].shape)
#     print(dic_model_forcing['CanESM5'][forcing_ssp585][key].shape)

In [9]:
forcing_hist  = "historical"
forcing_ssp585 = "ssp585"

dic_merged_runs_ssp585 = {i: [] for i in model_names}

for idx, model in enumerate(model_names):
    
    dic_merged_runs_ssp585[model] = {}

    for idx_key, key in enumerate(dic_model_forcing[model][forcing_ssp585].keys()):
        
        if key in dic_model_forcing[model][forcing_hist].keys():
            
            print(model, key)
            print(dic_model_forcing[model][forcing_hist][key].shape)
            print(dic_model_forcing[model][forcing_ssp585][key].shape)


            # load the run
            hist_run = dic_model_forcing[model][forcing_hist][key]
            future_run = dic_model_forcing[model][forcing_ssp585][key]

            # concatenate the run 
            full_run = np.concatenate([hist_run,future_run],axis=0) 
            dic_merged_runs_ssp585[model][key] = full_run

CanESM5-1 r10i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r10i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r1i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r1i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r2i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r3i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r3i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r4i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r4i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r5i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r6i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r6i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r7i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r7i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r8i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5-1 r8i1p2f1
(165, 72, 144)
(86, 72, 144)
(251,

(251, 72, 144)
MIROC6 r41i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r40i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r8i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r43i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r48i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r49i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r47i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r7i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r6i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r39i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r10i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r50i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r45i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r4i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r12i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r18i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
MIROC6 r30i1p

(251, 72, 144)
CanESM5 r18i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r22i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r21i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r24i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r23i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r20i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r25i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r15i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r21i1p2f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
CanESM5 r19i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
TaiESM1 r1i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
ACCESS-CM2 r1i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
ACCESS-CM2 r2i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
ACCESS-CM2 r3i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
ACCESS-CM2 r4i1p1f1
(165, 72, 144)
(86, 72, 144)
(251, 72, 144)
ACCESS-CM2 r5i1p1f1
(165, 72, 144)
(86, 72, 144)

# Save dictionary

In [11]:
# load pickle module
import pickle

# create a binary pickle file 
f = open("ssp585_time_series.pkl","wb")

# write the python object (dict) to pickle file
pickle.dump(dic_merged_runs_ssp585,f)

# close file
f.close()