In [1]:
import iris
import Utils.iris_utils as iris_utils
import pandas as pd
import numpy as np
import os
import logging
import esmvalcore.preprocessor
import glob
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
import xarray as xr
from xmip.preprocessing import rename_cmip6

In [2]:
def UKESM_land_mask(in_ds):
    land_frac = rename_cmip6(xr.open_dataset('/badc/cmip6/data/CMIP6/CMIP/MOHC/UKESM1-0-LL/piControl/r1i1p1f2/fx/sftlf/gn/latest/sftlf_fx_UKESM1-0-LL_piControl_r1i1p1f2_gn.nc'))
    land_cutoff = 99
    land_mask = xr.where(land_frac['sftlf'] > land_cutoff, True, False)
    #land_mask = land_mask.reindex(time=in_ds['time'], method='ffill')
    masked_ds = in_ds.where(land_mask, drop=True)
    return masked_ds

In [3]:
def preprocess(folder, var, min_lat=-20, max_lat=20, 
               land_only=True, test=False):
    
    """ makes a df of mean temp by year, in lat/lon slice provided, 
        with optional land mask """
    
    outpath = 'int_outputs/{M}_{Exp}_tropics_land_{v}_{ens}.csv'.format(
                            M=folder.split('/')[7], Exp=folder.split('/')[8], v=var, ens=folder.split('/')[9])
    
    
                                                      
    #try:
    data = rename_cmip6(xr.open_mfdataset(folder + "*.nc", use_cftime=True))
    jan_mask = data.time.dt.month.isin([1]) 
    name = str(folder.split('/')[7] + '_' + folder.split('/')[8] + '_' + folder.split('/')[9])
    
    at_data = data[var]
    data.close()
        
    years = data.time.dt.year[jan_mask].compute()
    at_data = (at_data).sel(y=slice(min_lat,max_lat)).groupby("time.year").mean(dim="time")
    if land_only:
        at_data = UKESM_land_mask(at_data)
        
    at_data = at_data.weighted(weights=np.cos(np.deg2rad(at_data.y)))
    
    
    df = pd.DataFrame({var:at_data.mean(("x","y")).values,
                       'year':np.unique(years.values)})
    at_data.close()
    df.set_index('year',inplace=True)
    df.sort_index(inplace=True)
    df['Model'] = folder.split('/')[7]
    df['Experiment'] = folder.split('/')[8]
    df['Ensemble_member'] = folder.split('/')[9]
    if test:
        print(df)
    df.to_csv(outpath) 
        #except:
        #    print(name)

In [6]:
### test 

test_dir = '/badc/cmip6/data/CMIP6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp245/r10i1p1f2/day/tasmax/gn/latest/'

preprocess(folder=test_dir, var=var, min_lat=-20, max_lat=20, land_only=True, test=True)

AttributeError: 'DataArrayWeighted' object has no attribute 'close'

In [5]:
### now run

exps = ['ssp245', 'ssp585', 'G6sulfur', 'G6solar', 'G1', 'piControl', 'abrupt-4xCO2']

dirs = []
domain = "day"

vars = ["tasmin", "tasmax"]
#var = "tasmax"

models = ['MOHC/UKESM1-0-LL']

exp_dict = {'ssp245': 'ScenarioMIP',
            'ssp585': 'ScenarioMIP',
            'historical': 'CMIP',
            'G6sulfur': 'GeoMIP',
            'G6solar': 'GeoMIP', 
            'G1':'GeoMIP',
            'piControl':'CMIP',
            'abrupt-4xCO2':'CMIP'}


for var in vars:
    print(var)
    dirs = []
    for experiment in exps:
        exp_set = exp_dict[experiment]
        for model in models:        
            for x in glob.glob('/badc/cmip6/data/CMIP6/{es}/{m}/{e}/*/{d}/{v}/*/latest/'.format(
                                            es=exp_set, m=model, e=experiment, d=domain, v=var)):
                dirs.append(x)
    print(len(dirs))
    
    for dir in tqdm(dirs):
       preprocess(dir, var=var, min_lat=-20, max_lat=20, land_only=True)

tasmax
5
