In [1]:
import os
import sys
import numpy as np
import pandas as pd
from importlib import reload
from glob import glob
import json

sys.path.append('modules')
import constants

In [2]:
constants.IMAGE_SAVE_DIR_INIT

'/home/563/ab2313/gdata/images/PhD/init'

# Paths

The code below can be used to ge the paths of all the files. However, this can be ery slow (3 hours) for piControl. 

In [3]:
# TODO: This should be a dict not a csv so you don't have to drop nans
fpaths_df = pd.read_csv('data/fpaths_picontrol.csv')[['picontrol', '1000pgc']]
fpaths_dict = fpaths_df.to_dict(orient='list')
fpaths_dict['1000pgc'] = [v for v in fpaths_dict['1000pgc'] if isinstance(v, str)]
fpaths_dict.keys()

dict_keys(['picontrol', '1000pgc'])

In [4]:
unique_picontrol_paths = np.unique(list(map(os.path.dirname, fpaths_dict['picontrol'])))
unique_picontrol_paths[:5]

array(['/g/data/oi10/replicas/CMIP6/CMIP/AS-RCEC/TaiESM1/piControl/r1i1p1f1/Amon/tas/gn/v20200211',
       '/g/data/oi10/replicas/CMIP6/CMIP/AWI/AWI-CM-1-1-MR/piControl/r1i1p1f1/Amon/tas/gn/v20191015',
       '/g/data/oi10/replicas/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/piControl/r1i1p1f1/Amon/tas/gn/v20200212',
       '/g/data/oi10/replicas/CMIP6/CMIP/BCC/BCC-CSM2-MR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20181114',
       '/g/data/oi10/replicas/CMIP6/CMIP/BCC/BCC-CSM2-MR/piControl/r1i1p1f1/Amon/tas/gn/v20181016'],
      dtype='<U111')

In [5]:
# Unique abspath (no filename).
unique_1000PgC_paths = np.unique(list(map(os.path.dirname,fpaths_dict['1000pgc'])))
# Removing all precipitation files.
base_fpaths_1000PgC = np.sort([f for f in unique_1000PgC_paths if '/tas/' in f]).tolist()
base_fpaths_1000PgC[:5]

['/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r1i1p2f1/Amon/tas/gn/v20190429',
 '/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r2i1p2f1/Amon/tas/gn/v20190429',
 '/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r3i1p2f1/Amon/tas/gn/v20190429',
 '/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r4i1p2f1/Amon/tas/gn/v20190429',
 '/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r5i1p2f1/Amon/tas/gn/v20190429']

In [6]:
# These are all of the base models - models may have different experminents (r1i1p2f1, r2i1p2f1)
base_group = np.unique(['/'.join(f.split('/')[:10])  for f in base_fpaths_1000PgC])
base_group

array(['/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/MIROC/MIROC-ES2L/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/MOHC/UKESM1-0-LL/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/MPI-M/MPI-ESM1-2-LR/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/NASA-GISS/GISS-E2-1-G-CC/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/NCAR/CESM2/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/NCC/NorESM2-LM/esm-1pct-brch-1000PgC',
       '/g/data/oi10/replicas/CMIP6/C4MIP/NOAA-GFDL/GFDL-ESM4/esm-1pct-brch-1000PgC'],
      dtype='<U80')

In [7]:
# Get one experiment for each modelling group
needed_experiments = []
# Looping through all the base paths
for base_group_single in base_group:
    
    # All the experiments that have this base path
    experiments = [f for f in base_fpaths_1000PgC if base_group_single in f]
    # Get just the first one available
    needed_experiments.append(experiments[0])
    
needed_experiments

['/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r1i1p2f1/Amon/tas/gn/v20190429',
 '/g/data/oi10/replicas/CMIP6/C4MIP/MIROC/MIROC-ES2L/esm-1pct-brch-1000PgC/r1i1p1f2/Amon/tas/gn/v20200622',
 '/g/data/oi10/replicas/CMIP6/C4MIP/MOHC/UKESM1-0-LL/esm-1pct-brch-1000PgC/r1i1p1f2/Amon/tas/gn/v20200210',
 '/g/data/oi10/replicas/CMIP6/C4MIP/MPI-M/MPI-ESM1-2-LR/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20190815',
 '/g/data/oi10/replicas/CMIP6/C4MIP/NASA-GISS/GISS-E2-1-G-CC/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191202',
 '/g/data/oi10/replicas/CMIP6/C4MIP/NCAR/CESM2/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191119',
 '/g/data/oi10/replicas/CMIP6/C4MIP/NCC/NorESM2-LM/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191108',
 '/g/data/oi10/replicas/CMIP6/C4MIP/NOAA-GFDL/GFDL-ESM4/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gr1/v20180701']

In [8]:
needed_model_dict = {v.split('/')[8]:v for v in needed_experiments}
needed_model_dict

{'CanESM5': '/g/data/oi10/replicas/CMIP6/C4MIP/CCCma/CanESM5/esm-1pct-brch-1000PgC/r1i1p2f1/Amon/tas/gn/v20190429',
 'MIROC-ES2L': '/g/data/oi10/replicas/CMIP6/C4MIP/MIROC/MIROC-ES2L/esm-1pct-brch-1000PgC/r1i1p1f2/Amon/tas/gn/v20200622',
 'UKESM1-0-LL': '/g/data/oi10/replicas/CMIP6/C4MIP/MOHC/UKESM1-0-LL/esm-1pct-brch-1000PgC/r1i1p1f2/Amon/tas/gn/v20200210',
 'MPI-ESM1-2-LR': '/g/data/oi10/replicas/CMIP6/C4MIP/MPI-M/MPI-ESM1-2-LR/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20190815',
 'GISS-E2-1-G-CC': '/g/data/oi10/replicas/CMIP6/C4MIP/NASA-GISS/GISS-E2-1-G-CC/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191202',
 'CESM2': '/g/data/oi10/replicas/CMIP6/C4MIP/NCAR/CESM2/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191119',
 'NorESM2-LM': '/g/data/oi10/replicas/CMIP6/C4MIP/NCC/NorESM2-LM/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gn/v20191108',
 'GFDL-ESM4': '/g/data/oi10/replicas/CMIP6/C4MIP/NOAA-GFDL/GFDL-ESM4/esm-1pct-brch-1000PgC/r1i1p1f1/Amon/tas/gr1/v20180701'}

In [9]:
zecmip_path_dict = {'1000pgc': {'all': unique_1000PgC_paths.tolist(), 'needed': needed_model_dict}
                    , 'picontrol': unique_picontrol_paths.tolist()}
zecmip_path_dict.keys()

dict_keys(['1000pgc', 'picontrol'])

In [10]:
with open('data/zecmip_model_paths.json', 'w') as f:
    json.dump(zecmip_path_dict, f)