# File management

The goal here is to merge all variables for their respective case using ```cdo mergetime```.

# Setup

In [26]:
import os
import shutil
import glob
import pandas as pd
import numpy as np
import xarray as xr
import pickle as pkl

import cartopy
import cartopy.crs as ccrs
import cartopy.mpl.ticker as cticker
from cartopy.util import add_cyclic_point

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.path as mpath

import cmocean.cm as cmo

import warnings
warnings.filterwarnings("ignore", message="Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range")
warnings.filterwarnings("ignore", message="Ambiguous reference date string: 101-01-01. The first value is assumed to be the year hence will be padded with zeros to remove the ambiguity (the padded reference date string is: 0101-01-01). To remove this message, remove the ambiguity by padding your reference date strings with zeros.")

In [3]:
## some magic to automatically reload my functions before running a new cell
# %load_ext autoreload
## %reload_ext autoreload
# %autoreload 1
# %aimport itcz_phys.cmiptools

# import itcz_phys.cmiptools as cmip

## Figure settings

In [4]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120

# %config InlineBackend.figure_formats = ['pdf']
%config InlineBackend.figure_formats = ['png']

## Create constants

In [5]:
# Cases / experiments
cases = ['piControl', '1pctCO2', '1pctCO2-rad', '1pctCO2-bgc']
cases_rad = ['1pctCO2', '1pctCO2-rad']


# Models with data in /tiger/scratch/gpfs/GEOCLIM/bgb2/CMIP/
# --> removed MRI-ESM2-0 because 1pctCO2 and 1pctCO2-rad Amon variables are the same

# --> models with HT, VEG for 1pctCO2 and 1pctCO2-rad (i.e., the max number of models used)
models = ['ACCESS-ESM1-5', 'BCC-CSM2-MR', 'CESM2', 'CMCC-ESM2',
          'CNRM-ESM2-1', 'EC-Earth3-CC', 'GFDL-ESM4', 'GISS-E2-1-G',
          'IPSL-CM6A-LR', 'MIROC-ES2L', 'MPI-ESM1-2-LR', 'NorESM2-LM',
          'UKESM1-0-LL']
#  --> models with HT, VEG, SW for all cases
models_all = ['ACCESS-ESM1-5', 'BCC-CSM2-MR', 'CMCC-ESM2',
              'CNRM-ESM2-1', 'GISS-E2-1-G', 'IPSL-CM6A-LR', 'MIROC-ES2L',
              'MPI-ESM1-2-LR', 'UKESM1-0-LL']
# --> only models with HT, VEG, SW for 1pctCO2 and 1pctCO2-rad
models_radsw = ['EC-Earth3-CC']
# --> only models with HT, VEG for 1pctCO2 and 1pctCO2-rad
models_rad = ['CESM2', 'EC-Earth3-CC', 'GFDL-ESM4', 'NorESM2-LM']


# Variables
variables = ['evspsbl', 'hfls', 'hfss', 'hus', 'lai', 'pr', 'prsn', 'ps',
             'rlds', 'rlus', 'rlut', 'rsds', 'rsdt', 'rsus', 'rsut', 'ta',
             'tas', 'uas', 'vas']
variables_ht = ['hfls', 'hfss', 'hus', 'pr', 'prsn', 'ps', 'rlds', 'rlus',
                'rlut', 'rsds', 'rsdt', 'rsus', 'rsut', 'ta']
variables_vegsw = ['evspsbl', 'lai', 'tas', 'uas', 'vas']
variables_veg = ['evspsbl', 'lai', 'tas']


# Table ID
table_id = {
    'evspsbl': 'Amon',
    'hfls': 'Amon',
    'hfss': 'Amon',
    'hus': 'Amon',
    'lai': 'Lmon',
    'pr': 'Amon',
    'prsn': 'Amon',
    'ps': 'Amon',
    'rlds': 'Amon', 
    'rlus': 'Amon',
    'rlut': 'Amon',
    'rsds': 'Amon',
    'rsdt': 'Amon',
    'rsus': 'Amon',
    'rsut': 'Amon',
    'ta': 'Amon',
    'tas': 'Amon',
    'uas': 'Amon',
    'vas': 'Amon'
}


# Grid label
grid_label = {
    'ACCESS-ESM1-5': 'gn',
    'BCC-CSM2-MR': 'gn',
    'CESM2': 'gn',
    'CMCC-ESM2': 'gn',
    'CNRM-ESM2-1': 'gr',
    'EC-Earth3-CC': 'gr',
    'GFDL-ESM4': 'gr1',
    'GISS-E2-1-G': 'gn',
    'IPSL-CM6A-LR': 'gr',
    'MIROC-ES2L': 'gn',
    'MPI-ESM1-2-LR': 'gn',
    'MRI-ESM2-0': 'gn',
    'NorESM2-LM': 'gn',
    'UKESM1-0-LL': 'gn'
}


# Months
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
months_abbr = ['J','F','M','A','M','J','J','A','S','O','N','D']


# Directory where CMIP6 model output lives on tiger
cmipdir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIP/'
cmipmergedir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIPmerge/'

## Create helper dictionaries

### Create dictionaries

#### Table ID

In [15]:
table_id = {
    'areacella': 'fx',
    'evspsbl': 'Amon',
    'hfls': 'Amon',
    'hfss': 'Amon',
    'hus': 'Amon',
    'lai': 'Lmon',
    'pr': 'Amon',
    'prsn': 'Amon',
    'ps': 'Amon',
    'rlds': 'Amon', 
    'rlus': 'Amon',
    'rlut': 'Amon',
    'rsds': 'Amon',
    'rsdt': 'Amon',
    'rsus': 'Amon',
    'rsut': 'Amon',
    'sftlf': 'fx',
    'ta': 'Amon',
    'tas': 'Amon',
    'uas': 'Amon',
    'vas': 'Amon'
}

# with open('../pkl_files/table_id.pkl', 'wb') as file:
#     pkl.dump(table_id, file, pkl.HIGHEST_PROTOCOL)

#### Grid label

In [16]:
grid_label = {
    'ACCESS-ESM1-5': 'gn',
    'BCC-CSM2-MR': 'gn',
    'CESM2': 'gn',
    'CMCC-ESM2': 'gn',
    'CNRM-ESM2-1': 'gr',
    'EC-Earth3-CC': 'gr',
    'GFDL-ESM4': 'gr1',
    'GISS-E2-1-G': 'gn',
    'IPSL-CM6A-LR': 'gr',
    'MIROC-ES2L': 'gn',
    'MPI-ESM1-2-LR': 'gn',
    'MRI-ESM2-0': 'gn',
    'NorESM2-LM': 'gn',
    'UKESM1-0-LL': 'gn'
}

# with open('../pkl_files/grid_label.pkl', 'wb') as file:
#     pkl.dump(grid_label, file, pkl.HIGHEST_PROTOCOL)

#### Variant ID

In [17]:
variant_id = {
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'BCC-CSM2-MR': 'r1i1p1f1',
    'CanESM5': 'r1i1p1f1',
    'CESM2': 'r1i1p1f1',
    'CMCC-ESM2': 'r1i1p1f1',
    'CNRM-ESM2-1': 'r1i1p1f2',
    'EC-Earth3-CC': 'r1i1p1f1',
    'GFDL-ESM4': 'r1i1p1f1',
    'GISS-E2-1-G': 'r101i1p1f1',
    'IPSL-CM6A-LR': 'r1i1p1f1',
    'MIROC-ES2L': 'r1i1p1f2',
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'MRI-ESM2-0': 'r1i2p1f1',
    'NorESM2-LM': 'r1i1p1f1',
    'UKESM1-0-LL': 'r1i1p1f2'
}

# with open('../pkl_files/variant_id.pkl', 'wb') as file:
#     pkl.dump(variant_id, file, pkl.HIGHEST_PROTOCOL)

#### Start times in PI

In [24]:
for m in models:
    print(m)
    for f in sorted(os.listdir(cmipmergedir+m)):
        # if f.split('_')[3] == ('1pctCO2' or '1pctCO2-rad'):
        if f.split('_')[3] == ('piControl'):
            print(f)
            st = xr.open_dataset(cmipmergedir+m+'/'+f).time.isel(time=0).values
            print('  ',st)

In [22]:
start_year_in_pi = {
    'ACCESS-ESM1-5': '0101-01',
    'BCC-CSM2-MR': '1850-01',
    'CESM2': '',
    'CMCC-ESM2': '1850-01',
    'CNRM-ESM2-1': '1850-01',
    'EC-Earth3-CC': '',
    'GFDL-ESM4': '',
    'GISS-E2-1-G': '1850-01',
    'IPSL-CM6A-LR': '1850-01',
    'MIROC-ES2L': '1850-01',
    'MPI-ESM1-2-LR': '1850-01',
    'NorESM2-LM': '',
    'UKESM1-0-LL': '1960-01'
}

# with open('../pkl_files/start_year_in_pi.pkl', 'wb') as file:
#     pkl.dump(start_year_in_pi, file, pkl.HIGHEST_PROTOCOL)

#### Start times in CO2

In [25]:
for m in models:
    print(m)
    for f in sorted(os.listdir(cmipmergedir+m)):
        if f.split('_')[3] == ('1pctCO2' or '1pctCO2-rad'):
            print(f)
            st = xr.open_dataset(cmipmergedir+m+'/'+f).time.isel(time=0).values
            print('  ',st)

In [18]:
start_year_in_co2 = {
    'ACCESS-ESM1-5': '0101-01',
    'BCC-CSM2-MR': '1850-01',
    'CESM2': '0001-01',
    'CMCC-ESM2': '1850-01',
    'CNRM-ESM2-1': '1850-01',
    'EC-Earth3-CC': '1850-01',
    'GFDL-ESM4': '0001-01',
    'GISS-E2-1-G': '1850-01',
    'IPSL-CM6A-LR': '1850-01',
    'MIROC-ES2L': '1850-01',
    'MPI-ESM1-2-LR': '1850-01',
    'NorESM2-LM': '0001-01',
    'UKESM1-0-LL': '1850-01'
}

# with open('../pkl_files/start_year_in_co2.pkl', 'wb') as file:
#     pkl.dump(start_year_in_co2, file, pkl.HIGHEST_PROTOCOL)

#### Branch times in PI

In [19]:
# Originally from an NCL file created by Lily Hahn
# for GISS-ES2-1-G with variant_id=r101i1p1f1 --> branch time is 185001-201412 (originally 415001-430012 in Lily's file)
branch_times_in_pi = {
  'ACCESS-CM2': '095001-144912', 'ACCESS-ESM1-5': '010101-060012', 'AWI-CM-1-1-MR': '265001-280012', 'BCC-CSM2-MR': '185001-244912', 'BCC-ESM1': '185001-230012',
  'CAMS-CSM1-0': '303001-339912', 'CESM2': '050101-069912', 'CESM2-FV2': '032101-050012', 'CESM2-WACCM': '007001-029912', 'CESM2-WACCM-FV2': '030101-049912',
  'CMCC-CM2-SR5': '185001-209912', 'CMCC-ESM2': '185001-209912', 'CNRM-CM6-1': '185001-234912', 'CNRM-CM6-1-HR': '185001-214912', 'CNRM-ESM2-1': '185001-234912',
  'CanESM5': '520101-540012', 'E3SM-1-0': '010101-025012', 'EC-Earth3': '254001-270012', 'EC-Earth3-AerChem': '185001-199912', 'EC-Earth3-Veg': '185001-200012',
  'FGOALS-f3-L': '060001-116012', 'FGOALS-g3': '046301-061912', 'GFDL-CM4': '025101-045012', 'GFDL-ESM4': '010101-030012', 'GISS-E2-1-G': '185001-201412',
  'GISS-E2-1-H': '318001-333012', 'GISS-E2-2-G': '200001-215012', 'HadGEM3-GC31-LL': '185001-204912', 'HadGEM3-GC31-MM': '185001-200912', 'IITM-ESM': '192601-212512',
  'INM-CM4-8': '194701-214912', 'INM-CM5-0': '209901-226812', 'IPSL-CM6A-LR': '187001-234912', 'MIROC-ES2L': '185001-204912', 'MIROC6': '320001-339912',
  'MPI-ESM-1-2-HAM': '195001-210912', 'MPI-ESM1-2-HR': '185001-199912', 'MPI-ESM1-2-LR': '185001-200912', 'MRI-ESM2-0': '185001-255012', 'NorCPM1': '025101-050012',
  'NorESM2-LM': '160001-210012', 'NorESM2-MM': '120001-134912', 'SAM0-UNICON': '027401-043012', 'TaiESM1': '020101-040012', 'UKESM1-0-LL': '196001-214912'
}

branch_start_year_in_pi = {}
for m in branch_times_in_pi.keys():
    raw_date = branch_times_in_pi[m][:6]
    branch_start_year_in_pi[m] = raw_date[:4]+'-'+raw_date[4:]

# l40 = last 40 years of 150 year 1% CO2 increase period
# start year
period_l40_start_year_in_pi = {}
for m in branch_times_in_pi.keys():
    raw_date = branch_times_in_pi[m][:6]
    len_date = len(raw_date)
    end_year = int(raw_date[:len_date-2])+100
    period_l40_start_year_in_pi[m] = str(end_year).zfill(4)+'01'
    
# end year
period_l40_end_year_in_pi = {}
for m in branch_times_in_pi.keys():
    raw_date = branch_times_in_pi[m][:6]
    len_date = len(raw_date)
    end_year = int(raw_date[:len_date-2])+139
    period_l40_end_year_in_pi[m] = str(end_year).zfill(4)+'12'

# slice
period_l40_slice_in_pi = {}
for m in branch_times_in_pi.keys():
    period_l40_slice_in_pi[m] = slice(period_l40_start_year_in_pi[m], period_l40_end_year_in_pi[m])

    
# with open('../pkl_files/branch_start_year_in_pi.pkl', 'wb') as file:
#     pkl.dump(branch_start_year_in_pi, file, pkl.HIGHEST_PROTOCOL)
# with open('../pkl_files/period_l40_slice_in_pi.pkl', 'wb') as file:
#     pkl.dump(period_l40_slice_in_pi, file, pkl.HIGHEST_PROTOCOL)
# with open('../pkl_files/period_l40_start_year_in_pi.pkl', 'wb') as file:
#     pkl.dump(period_l40_start_year_in_pi, file, pkl.HIGHEST_PROTOCOL)
# with open('../pkl_files/period_l40_end_year_in_pi.pkl', 'wb') as file:
#     pkl.dump(period_l40_end_year_in_pi, file, pkl.HIGHEST_PROTOCOL)

#### Times in CO2

In [20]:
# Get last 40 years of 1% CO2 period
period_l40_start_year_in_co2 = {}
period_l40_end_year_in_co2 = {}
period_l40_slice_in_co2 = {}

v = 'evspsbl'
for m in models:
    print(m)
    # for c in ['1pctCO2','1pctCO2-rad','1pctCO2-bgc']:
    for c in cases:
        print('  ',c)
        try:
            data = xr.open_dataset(cmipmergedir+m+'/'+v+'_'+table_id[v]+'_'+m+'_'+c+'_'+variant_id[m]+'_'+grid_label[m]+'.nc')

            period_l40_start_year_in_co2[m] = str(data['time'].isel(time=0).values)[:4]
            period_l40_start_year_in_co2[m] = str(int(period_l40_start_year_in_co2[m])+100).zfill(4)+'-01'
            period_l40_end_year_in_co2[m] = str(data['time'].isel(time=0).values)[:4]
            period_l40_end_year_in_co2[m] = str(int(period_l40_end_year_in_co2[m])+139).zfill(4)+'-12'
            period_l40_slice_in_co2[m] = slice(period_l40_start_year_in_co2[m], period_l40_end_year_in_co2[m])

            print('    ',data['time'][0].values)
            print('    ',data['time'][-1].values)
            print('    ',period_l40_start_year_in_co2[m])
            print('    ',period_l40_end_year_in_co2[m])          

        except OSError:
            print('     ###')
    
# with open('../pkl_files/period_l40_slice_in_co2.pkl', 'wb') as file:
#     pkl.dump(period_l40_slice_in_co2, file, pkl.HIGHEST_PROTOCOL)
# with open('../pkl_files/period_l40_start_year_in_co2.pkl', 'wb') as file:
#     pkl.dump(period_l40_start_year_in_co2, file, pkl.HIGHEST_PROTOCOL)
# with open('../pkl_files/period_l40_end_year_in_co2.pkl', 'wb') as file:
#     pkl.dump(period_l40_end_year_in_co2, file, pkl.HIGHEST_PROTOCOL)

ACCESS-ESM1-5
   piControl




     0101-01-16 12:00:00
     1100-12-16 12:00:00
     0201-01
     0240-12
   1pctCO2




     0101-01-16 12:00:00
     0250-12-16 12:00:00
     0201-01
     0240-12
   1pctCO2-rad
     0101-01-16 12:00:00
     0250-12-16 12:00:00
     0201-01
     0240-12
   1pctCO2-bgc




     0101-01-16 12:00:00
     0250-12-16 12:00:00
     0201-01
     0240-12
BCC-CSM2-MR
   piControl
     1850-01-16 12:00:00
     2449-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2
     1850-01-16 12:00:00
     2000-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2-rad
     1850-01-16 12:00:00
     2000-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2-bgc
     1850-01-16 12:00:00
     2000-12-16 12:00:00
     1950-01
     1989-12
CESM2
   piControl
     ###
   1pctCO2
     0001-01-15 12:00:00
     0150-12-15 12:00:00
     0101-01
     0140-12
   1pctCO2-rad
     0001-01-15 12:00:00
     0150-12-15 12:00:00
     0101-01
     0140-12
   1pctCO2-bgc
     ###
CMCC-ESM2
   piControl
     1850-01-16 12:00:00
     2349-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2
     1850-01-16 12:00:00
     2014-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2-rad
     1850-01-16 12:00:00
     2014-12-16 12:00:00
     1950-01
     1989-12
   1pctCO2-bgc
     1850-01-16 12:00:00
     20

KeyboardInterrupt: 

### Colors

In [42]:
colors = [
"#0264f6",
"#79f22b",
"#9a3aeb",
"#ffb02a",
"#b186ff",
"#005413",
"#ff5de4",
"#00c896",
"#d6004b",
"#75d4ff",
"#ff8650",
"#00356a",
"#e4d99b",
"#251400"
]

mcolors = {}
for i,m in enumerate(models):
    mcolors[m] = colors[i]

In [15]:
# Distinct colors generated using https://mokole.com/palette.html
# mcolors = {'ACCESS-ESM1-5': '#2f4f4f',
#            'BCC-CSM2-MR': '#8b4513',
#            'CanESM5': '#6b8e23',
#            'CESM2': '#4b0082',
#            'CMCC-ESM2': '#ff0000',
#            'CNRM-ESM2-1': '#ffff00',
#            'EC-Earth3-CC': '#ffb0ae',
#            'GFDL-ESM4': '#40e0d0',
#            'GISS-E2-1-G': '#00ff00',
#            'IPSL-CM6A-LR': '#0000ff',
#            'MIROC-ES2L': '#ff00ff',
#            'MPI-ESM1-2-LR': '#6495ed',
#            'NorESM2-LM': '#ff1493',
#            'UKESM1-0-LL': '#ffc0cb'}

mcolors = {
    'ACCESS-ESM1-5': '#0264f6',
    'BCC-CSM2-MR': '#79f22b',
    'CESM2': '#9a3aeb',
    'CMCC-ESM2': '#ffb02a',
    'CNRM-ESM2-1': '#b186ff',
    'EC-Earth3-CC': '#005413',
    'GFDL-ESM4': '#ff5de4',
    'GISS-E2-1-G': '#00c896',
    'IPSL-CM6A-LR': '#d6004b',
    'MIROC-ES2L': '#75d4ff',
    'MPI-ESM1-2-LR': '#ff8650',
    'NorESM2-LM': '#00356a',
    'UKESM1-0-LL': '#e4d99b'
}


with open('../pkl_files/mcolors.pkl', 'wb') as file:
    pkl.dump(mcolors, file, protocol=pkl.HIGHEST_PROTOCOL)

## Load dictionaries

In [6]:
# Get the last 40 years of the 1% CO2 runs and the corresponding time period in piControl
with open('../pkl_files/period_l40_slice_in_pi.pkl', 'rb') as file:
    period_l40_slice_in_pi = pkl.load(file)
with open('../pkl_files/period_l40_start_year_in_pi.pkl', 'rb') as file:
    period_l40_start_year_in_pi = pkl.load(file)
with open('../pkl_files/period_l40_end_year_in_pi.pkl', 'rb') as file:
    period_l40_end_year_in_pi = pkl.load(file)
with open('../pkl_files/start_year_in_pi.pkl', 'rb') as file:
    start_year_in_pi = pkl.load(file)

with open('../pkl_files/period_l40_slice_in_co2.pkl', 'rb') as file:
    period_l40_slice_in_co2 = pkl.load(file)
with open('../pkl_files/period_l40_start_year_in_co2.pkl', 'rb') as file:
    period_l40_start_year_in_co2 = pkl.load(file)
with open('../pkl_files/period_l40_end_year_in_co2.pkl', 'rb') as file:
    period_l40_end_year_in_co2 = pkl.load(file)
with open('../pkl_files/start_year_in_co2.pkl', 'rb') as file:
    start_year_in_co2 = pkl.load(file)

    
# Get each model's variant_id
with open('../pkl_files/variant_id.pkl', 'rb') as file:
    variant_id = pkl.load(file)
    
# Get each model's table_id
with open('../pkl_files/table_id.pkl', 'rb') as file:
    table_id = pkl.load(file)
    
# Get each model's grid_label
with open('../pkl_files/grid_label.pkl', 'rb') as file:
    grid_label = pkl.load(file)
    
# Colors corresponding to each model
with open('../pkl_files/mcolors.pkl', 'rb') as file:
    mcolors = pkl.load(file)

# Merge files

```var_table-id_model_case_variant-id_grid_time.nc```

## Helper functions

In [7]:
def writeMergePaths(theseModels, theseCases, theseVariables, tag, toMerge=None):
    if toMerge == True and type(toMerge) == bool:
        filename = 'paths-m'+str(tag)+'.txt'
    if toMerge == False and type(toMerge) == bool:
        filename = 'paths-c'+str(tag)+'.txt'
        
    if type(theseVariables[0]) == list:
        tmp_vars = []
        for item in theseVariables:
            tmp_vars += item
        theseVariables = tmp_vars
    
    outfile = []
    for m in theseModels:
        for c in theseCases:
            for v in theseVariables:
                out = mergetime(m, c, v, toFile=True, toMerge=toMerge, toDelete=False, toPrint=False)
                if type(out) == str:
                    outfile.append(out)
    
    if len(outfile) > 0:
        with open('../bash_scripts/'+filename, 'w') as f:
            f.writelines(outfile)
        print(filename)
    else:
        print('no files')

In [8]:
def mergetime(thisM, thisC, thisV, toPrint=True, toFile=False, toMerge=None, toDelete=False):
    cmipdir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIP/'
    cmipmergedir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIPmerge/'
    
    fsearch = cmipdir+thisM+'/'+thisV+'_'+table_id[thisV]+'_'+thisM+'_'+thisC+'_'+variant_id[thisM]+'_'+grid_label[thisM]+'_*.nc'
    
    files = glob.glob(fsearch)    
    stimes = []
    etimes = []
    for f in files:
        stimes.append(int(f.split('/')[-1].split('_')[-1].split('.')[0].split('-')[0][:4]))
        etimes.append(int(f.split('/')[-1].split('_')[-1].split('.')[0].split('-')[1][:4]))
    
    fname = thisV+'_'+table_id[thisV]+'_'+thisM+'_'+thisC+'_'+variant_id[thisM]+'_'+grid_label[thisM]+'_%s-%s.nc' % (str(min(stimes)).zfill(4)+'01', str(max(etimes)).zfill(4)+'12')
    fdir = cmipmergedir+thisM+'/'+fname
    
    if toPrint:
        print('    ','/'.join(fsearch.split('/')[-2:]))
        if len(stimes) > 1 and len(etimes) > 1:
            print('     - ',fdir)
    
    if toFile and toMerge and type(toMerge) == bool:
        if len(stimes) > 1 and len(etimes) > 1:
            return fsearch+':'+fdir+'\n'
        
    if toFile and not toMerge and type(toMerge) == bool:
        if len(stimes) == 1 and len(etimes) == 1:
            test_fname = fsearch.split('/')[-1][:-4]+'%s-%s.nc' % (str(min(stimes)).zfill(4)+'01', str(max(etimes)).zfill(4)+'12')
            
            if os.path.isfile(cmipdir+thisM+'/'+test_fname) and fname == test_fname:
                return cmipdir+thisM+'/'+fname+':'+cmipmergedir+thisM+'/'+fname+'\n'
            
            else:
                print('ERROR')
                print(fname)
                print(tmp_fname)
                print(cmipdir+thisM+'/'+tmp_fname)
                       
    if toDelete:
        if len(stimes) > 1 and len(etimes) > 1:
            print('----',fdir.split('/')[-1])
            ## os.popen('rm %s' % fdir)

## Create text files with merge/copy paths

In [27]:
## All models with HT + VEG + SW for all cases
# writeMergePaths(models_al, cases, variables, '1', toMerge=True)
# writeMergePaths(models_all, cases, variables, '1', toMerge=False)

## Models with HT + VEG for FULL (1pctCO2) and RAD (1pctCO2-rad)
# writeMergePaths(models_rad, cases_rad, [variables_ht, variables_veg], '2', toMerge=True)
# writeMergePaths(models_rad, cases_rad, [variables_ht, variables_veg], '2', toMerge=False)

## Models with HT + VEG + SW for FULL (1pctCO2) and RAD (1pctCO2-rad)
# writeMergePaths(models_radsw, cases_rad, variables, '3', toMerge=True)
# writeMergePaths(models_radsw, cases_rad, variables, '3', toMerge=False)

## Redo BCC-CSM2-MR
# writeMergePaths(['BCC-CSM2-MR'], cases, variables, '-bcc', toMerge=True)
# writeMergePaths(['BCC-CSM2-MR'], cases, variables, '-bcc', toMerge=False)

## Redo NorESM2-LM for hus - 1pctCO2, piControl
# writeMergePaths(['NorESM2-LM'], ['1pctCO2', 'piControl'], ['hus'], '-nor-hus-full-pi', toMerge=True)
# writeMergePaths(['NorESM2-LM'], ['1pctCO2', 'piControl'], ['hus'], '-nor-hus-full-pi', toMerge=False)

## hus
# writeMergePaths(models_all, cases, ['hus'], '-hus-all', toMerge=True)
# writeMergePaths(models_all, cases, ['hus'], '-hus-all', toMerge=False)

# writeMergePaths(models_rad, cases_rad, ['hus'], '-hus-full-rad', toMerge=True)
# writeMergePaths(models_rad, cases_rad, ['hus'], '-hus-full-rad', toMerge=False)

## Redo EC-Earth3-CC for hus - 1pctCO2, 1pctCO2-rad
writeMergePaths(['EC-Earth3-CC'], cases_rad, ['hus'], '-ec-hus-full-rad', toMerge=True)
writeMergePaths(['EC-Earth3-CC'], cases_rad, ['hus'], '-ec-hus-full-rad', toMerge=False)

paths-m-ec-hus-full-rad.txt
no files


**At this point, run the bash scripts ```mergenetcdf.sh paths-mx.txt``` and ```copynetcdf.sh paths-cx.txt```**

## Remove date at end of file name

```var_table-id_model_case_variant-id_grid_time.nc```

In [21]:
with open('../pkl_files/mergedtimes.pkl', 'rb') as file:
    mergedtimes = pkl.load(file)

cmipdir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIP/'
cmipmergedir = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIPmerge/'

for d in sorted(os.listdir(cmipmergedir)):
    print(d)
    files = sorted(os.listdir(cmipmergedir+d))
    if len(files) > 0:
        for f in files:
            try:
                print(f.split('_')[6])
                newname = '_'.join(f.split('_')[:-1])+'.nc'
                # print(newname)
                # print('  ',cmipmergedir+d+'/'+f)
                # print('  ',cmipmergedir+d+'/'+newname)
                ## mergedtimes[newname] = f
                ## os.rename(cmipmergedir+d+'/'+f, cmipmergedir+d+'/'+newname)
            except:
                continue

## with open('../pkl_files/mergedtimes.pkl', 'wb') as file:
##     pkl.dump(mergedtimes, file, pkl.HIGHEST_PROTOCOL)

ACCESS-ESM1-5
BCC-CSM2-MR
CESM2
CMCC-ESM2
CNRM-ESM2-1
CanESM5
EC-Earth3-CC
GFDL-ESM4
GISS-E2-1-G
IPSL-CM6A-LR
MIROC-ES2L
MPI-ESM1-2-LR
MRI-ESM2-0
NorESM2-LM
UKESM1-0-LL


# Check merged/copied files

In [18]:
## Move files from CMIPtemp to CMIP

cmiptemp = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIPtemp/'
cmip = '/tiger/scratch/gpfs/GEOCLIM/bgb2/CMIP/'

for f in sorted(glob.glob(cmiptemp+'*.nc')):
    ncfile = f.split('/')[-1]
    m = ncfile.split('_')[2]
    
    if m != 'BCC-ESM1' and m != 'EC-Earth3':
        print(m)
        print(cmip+m+'/'+ncfile)
    
        ## shutil.move(f, cmip+m+'/'+ncfile)