# This notebook aims at processing the data (redefine the dates)

In [1]:
import numpy as np
import matplotlib
import netCDF4 as netcdf

## 1- Extract the metadata 

In [4]:
import os

# Get the list of all files and directories
path = "/net/atmos/data/cmip6-ng/tos/ann/native"
dir_list = os.listdir(path)

print("Files and directories in '", path, "' :")

dic_files = {}

list_model = []
list_forcing = []

for idx, file in enumerate(dir_list):

    file_split = file.split("_")
    
    # extract model names
    model_name = file_split[2]
    forcing = file_split[3]
    run_name = file_split[4]
    
    
    list_model.append(model_name)
    list_forcing.append(forcing)
    
model_names = list(set(list_model))
forcing_names = list(set(list_forcing))

# define names dictionary 
dic_model_forcing = {}

for idx,model in enumerate(model_names):
    print(idx, model)
    dic_model_forcing[model] = {}
    for forcing in forcing_names:
        dic_model_forcing[model][forcing] = {}

Files and directories in ' /net/atmos/data/cmip6-ng/tos/ann/native ' :
0 INM-CM5-0
1 CESM2-WACCM
2 BCC-CSM2-MR
3 CAS-ESM2-0
4 EC-Earth3-LR
5 MRI-ESM2-0
6 NorESM1-F
7 CIESM
8 GFDL-CM4
9 CESM2-WACCM-FV2
10 NESM3
11 MPI-ESM1-2-HR
12 KIOST-ESM
13 KACE-1-0-G
14 E3SM-1-0
15 CanESM5-1
16 SAM0-UNICON
17 CNRM-CM6-1
18 MPI-ESM-1-2-HAM
19 CAMS-CSM1-0
20 INM-CM4-8
21 CanESM5-CanOE
22 E3SM-1-1
23 MIROC-ES2L
24 CNRM-CM6-1-HR
25 BCC-ESM1
26 NorESM2-MM
27 IPSL-CM5A2-INCA
28 GISS-E2-2-H
29 E3SM-2-0
30 E3SM-2-0-NARRM
31 EC-Earth3-Veg-LR
32 MIROC-ES2H
33 NorCPM1
34 GISS-E2-1-G
35 ACCESS-ESM1-5
36 FGOALS-f3-L
37 CESM2
38 IITM-ESM
39 FIO-ESM-2-0
40 UKESM1-0-LL
41 GISS-E2-1-H
42 MIROC6
43 IPSL-CM6A-LR
44 CMCC-CM2-HR4
45 AWI-ESM-1-1-LR
46 E3SM-1-1-ECA
47 GISS-E2-1-G-CC
48 MPI-ESM1-2-LR
49 MCM-UA-1-0
50 AWI-CM-1-1-MR
51 CESM2-FV2
52 CMCC-ESM2
53 EC-Earth3-AerChem
54 GISS-E3-G
55 HadGEM3-GC31-LL
56 ICON-ESM-LR
57 UKESM1-1-LL
58 HadGEM3-GC31-MM
59 IPSL-CM6A-MR1
60 ACCESS-CM2
61 TaiESM1
62 GFDL-ESM4
63 IPSL-CM6A

## 2- Extract the data from each file (model, forcing, ensemble member)

In [9]:
for idx, file in enumerate(dir_list[:3]):

    file_split = file.split("_")

    # extract model names
    model = file_split[2]
    forcing = file_split[3]
    run_name = file_split[4]

    
    if model in list(dic_model_forcing.keys()):
          
        # read files in the directory
        file2read = netcdf.Dataset(path +'/'+ file,'r')

        # set variables
        time = np.array(file2read.variables['time'][:])
        longitude = np.array(file2read.variables['lon'][:])

        latitude = np.array(file2read.variables['lat'][:])
        print(longitude.shape)
        tos = np.array(file2read.variables['tos'][:])
      
        # assign nans to non-sea values
        tos[tos>1e19] = np.nan
        idx_nans = np.argwhere(np.isnan(tos))

        # get the data
        dic_model_forcing[model][forcing][run_name]= tos

(360,)
(360,)
(384, 320)



KeyboardInterrupt



## 3- Split data into historical data from 1850 until 2014 and forecast data from 2014 until 2100

In [4]:
forcing_hist  = "historical"

dic_runs_hist = {i: [] for i in model_names}

for idx, model in enumerate(model_names):
    
    dic_runs_hist[model] = {}

    for idx_key, key in enumerate(dic_model_forcing[model][forcing_hist].keys()):
        
        # load the run
        dic_runs_hist[model][key] = dic_model_forcing[model][forcing_hist][key]

    print(model, len(dic_runs_hist[model].keys()))

EC-Earth3-Veg 8
MIROC6 50
GISS-E2-1-G-CC 1
E3SM-2-0-NARRM 4
CAS-ESM2-0 4
E3SM-1-1-ECA 1
NorCPM1 30
MIROC-ES2H 3
EC-Earth3 23
UKESM1-0-LL 17
TaiESM1 1
MCM-UA-1-0 2
CAMS-CSM1-0 3
CNRM-CM6-1-HR 1
EC-Earth3-AerChem 1
GISS-E3-G 1
GFDL-ESM4 3
EC-Earth3-LR 0
GFDL-CM4 1
GISS-E2-1-H 25
NESM3 5
MPI-ESM1-2-LR 45
CMCC-ESM2 1
MIROC-ES2L 31
MPI-ESM-1-2-HAM 3
ACCESS-ESM1-5 40
BCC-ESM1 3
FGOALS-g3 6
GISS-E2-1-G 40
E3SM-2-0 21
CIESM 3
INM-CM5-0 10
UKESM1-1-LL 1
IPSL-CM6A-LR 33
NorESM1-F 0
ICON-ESM-LR 5
IITM-ESM 0
CESM2 11
SAM0-UNICON 1
FGOALS-f3-L 3
HadGEM3-GC31-LL 55
CESM2-FV2 4
CanESM5-1 72
NorESM2-MM 3
KACE-1-0-G 3
IPSL-CM5A2-INCA 1
IPSL-CM6A-LR-INCA 1
INM-CM4-8 1
ACCESS-CM2 10
HadGEM3-GC31-MM 4
AWI-ESM-1-1-LR 1
KIOST-ESM 1
MRI-ESM2-0 12
IPSL-CM6A-MR1 0
FIO-ESM-2-0 3
E3SM-1-0 4
CanESM5-CanOE 3
CNRM-ESM2-1 11
CanESM5 65
E3SM-1-1 1
MPI-ESM1-2-HR 10
GISS-E2-2-G 11
AWI-CM-1-1-MR 5
CESM2-WACCM 3
GISS-E2-2-H 5
CMCC-CM2-HR4 1
EC-Earth3-Veg-LR 3
CNRM-CM6-1 30
CESM2-WACCM-FV2 3
NorESM2-LM 3
CMCC-CM2-SR5 11
B

In [5]:
# forcing_hist  = "historical"
# forcing_ssp585 = "ssp585"

# dic_merged_runs_ssp585 = {i: [] for i in model_names}

# for idx, model in enumerate(model_names):
    
#     dic_merged_runs_ssp585[model] = {}

#     for idx_key, key in enumerate(dic_model_forcing[model][forcing_ssp585].keys()):
        
#         if key in dic_model_forcing[model][forcing_hist].keys():
            
#             print(model, key)
#             print(dic_model_forcing[model][forcing_hist][key].shape)
#             print(dic_model_forcing[model][forcing_ssp585][key].shape)


#             # load the run
#             hist_run = dic_model_forcing[model][forcing_hist][key]
#             future_run = dic_model_forcing[model][forcing_ssp585][key]

#             # concatenate the run 
#             full_run = np.concatenate([hist_run,future_run],axis=0) 
#             dic_merged_runs_ssp585[model][key] = full_run

# Save dictionary

In [6]:
# # load pickle module
# import pickle

# # create a binary pickle file 
# f = open("ssp585_time_series.pkl","wb")

# # write the python object (dict) to pickle file
# pickle.dump(dic_merged_runs_ssp585,f)

# # close file
# f.close()