In [1]:
"""
####################################################################################################################
Program    : calc_seasonal_means.ipynb
Usage      : Reads in numpy dictionary containing CMIP6 multi-model monthly precipitation data for a specified 
             region, and calculates the seasonal mean for each model for selected time period (historical or future).
             Output is a number of .npy files containing data for all models.
Written in : Python
Tested on  : JASMIN
Written by : Natalie Lord (natalie.lord@bristol.ac.uk), with some code adapted from script written by Jess Baker (j.c.baker@leeds.ac.uk)  
Date       : 03/06/2021
####################################################################################################################
"""

import os
import numpy as np
import xarray as xr
from datetime import datetime as dt
from pathlib import Path
import iris
import iris.coord_categorisation
from iris.experimental.equalise_cubes import equalise_attributes
from iris.util import unify_time_units
from iris.time import PartialDateTime
import iris.analysis.cartography
import cftime
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import datetime


def make_cmip6_filepath(directory_nam, # scenario, variable, data_type, experiment, region,
                        data_root="/home/users/nat_lord/cmip6_hackathon/data"):
    """
    Make a file path for a cmip6 dataset on JASMIN for a single variable
    """
    # get base path
    path = str(DATA_ROOT / directory_nam) # / data_type / model)
    #print(path)
    #print(os.listdir(path))
    
    print('JASMIN FILEPATH:')
    print(path)
    print('DIRECTORY CONTENTS:')
    print(os.listdir(path))
    return(path+ '/')

def make_cmip6_filenam(file_prefix, experiment, variable, latmin, latmax, lonmin, lonmax):
    """
    Make a file path for a cmip6 dataset on JASMIN for a single variable
    """
    file = str(file_prefix + '_' + experiment + '_' + variable + '_dict' + str(latmin) + '_to_' + str(latmax) + 'lat' + '_' + str(lonmin)  + '_to_' + str(lonmax)  + 'lon')
    
    print('JASMIN FILENAM:')
    print(file)
    return(file+ '.npy')

In [2]:
# create dictionary of models and institutes (allows you to loop over models and know the name of the directory that contains the data for that model)
basepath = '/badc/cmip6/data/CMIP6/CMIP/'
institute_list = os.listdir(basepath)
model_inst_dict = {}

# loop over institutes
for inst in institute_list:
    model_list = os.listdir(basepath + inst + '/')
    
    # for each institute list models and store in dictionary
    for model_temp in model_list:
        model_inst_dict[model_temp] = inst
    
    # correction for UKESM which is used by multiple centres - we want MOHC only
    model_inst_dict['UKESM1-0-LL'] = 'MOHC'
    
# print(model_inst_dict)

In [3]:
#assert False  # May want to add this if already saved data into dictionaries to prevent re-writing

# Read in precipitation data over domain for CMIP6 models and save data to dictionary

DATA_ROOT = Path("/home/users/nat_lord/cmip6_hackathon/data/")
#DATA_ROOT = Path("/gws/pw/j05/cop26_hackathons/bristol/project02/data/")

# dictionary to save subset model output
pr_datasets_seas_means = {}
pr_datasets_seas_means_av = {}

# Define region of interest
latmin = -90 #-40
latmax = 90 #40
lonmin = -180 #-20
lonmax = 180 #50

# variables for JASMIN directory structure
table_id = 'Amon'  # monthly model output
variable_id = 'pr'  # variable code for precipitation in cmip6 model output

# read in monthly zonal data 

# Larger selection of models
models = ['ACCESS-CM2', 'ACCESS-ESM1-5', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CanESM5',
          'CNRM-CM6-1', 'CNRM-ESM2-1', 'FGOALS-f3-L', 'FGOALS-g3', 'HadGEM3-GC31-MM',
          'GISS-E2-1-G', 'INM-CM5-0', 'INM-CM4-8',
          'MPI-ESM1-2-LR', 'NorESM2-LM', 'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL'] 

# Subset of models that have SSP119 data available
#models = ['CAMS-CSM1-0', 'CanESM5',
#          'CNRM-ESM2-1', 'FGOALS-g3',
#          'GISS-E2-1-G', 
#          'UKESM1-0-LL'] 

# Try for just one model to see if it works
#models = ['CanESM5', 'UKESM1-0-LL']

# Loop over multiple model experiments and calculate SPEI for all, north and south Ghana
for expt in ['historical']: #, 'historical', ssp119', 'ssp585']:

    try:
        # get CMIP6 precip data
        if expt == 'historical':
            scenario = 'CMIP'
            file_prefix = 'cmip6'
            directory_nam = str('CMIP6' + 'histo')
            first_yr = 's1850'

            start_year = 1970 #1970
            end_year = 1999 #2000

        elif expt in ['ssp119', 'ssp126', 'ssp245', 'ssp370', 'ssp585']:
            scenario = 'CMIP'
            file_prefix = 'cmip6'
            directory_nam = str('CMIP6' + 'proj')
            first_yr = 's2015'
#            scenario = 'ScenarioMIP'

            start_year = 2070 #2070
            end_year = 2099 #2100

#        time_period_subset = str(str(start_year) + '-' + str(end_year))

        # get filepath for data for particular model and variable of interest
        fp_nam_path = make_cmip6_filepath(directory_nam=directory_nam)
        fp_nam_file = make_cmip6_filenam(file_prefix=file_prefix, experiment=expt, variable=variable_id, latmin=latmin, latmax=latmax, lonmin=lonmin, lonmax=lonmax) 
            
        fp_nam = str(str(fp_nam_path) + '/' + fp_nam_file)
            
        # read in data
        pr_data_all = np.load(fp_nam).item()
            
#        print(pr_data_all)

    except FileNotFoundError:
        print(model, ' has no ' + expt.upper() + ' output')
        continue


JASMIN FILEPATH:
/home/users/nat_lord/cmip6_hackathon/data/CMIP6histo
DIRECTORY CONTENTS:
['zonalAverages', 'cmip6_historical_pr_dict.npy', 'cmip6_historical_pr_dict-40_to_40lat_-20_to_50lon.npy', 'cmip6_historical_pr_dict-90_to_90lat_-180_to_180lon.npy', 'global']
JASMIN FILENAM:
cmip6_historical_pr_dict-90_to_90lat_-180_to_180lon


In [4]:
    for model in models:
        print(model, '', expt.upper())

        # extract data for individual models
        model_time = pr_data_all[model].coord('time')
        model_lat = pr_data_all[model].coord('latitude')
        model_lon = pr_data_all[model].coord('lon')

        # print(pr_data)
#        print(model_time)
#        print(model_lat)
#        print(model_lon)

        # Model data - calculate seasonal data
        print('Calculating seasonal averages for  model data')

        iris.coord_categorisation.add_season(pr_data_all[model], 'time', name='clim_season')
        iris.coord_categorisation.add_season_year(pr_data_all[model], 'time', name='season_year')

        pr_cube_seas_mean = pr_data_all[model].aggregated_by(
                                     ['clim_season', 'season_year'], 
                                     iris.analysis.MEAN)

        tdelta_3mth = datetime.timedelta(days=3*28)
        spans_three_months = lambda t: (t.bound[1] - t.bound[0]) > tdelta_3mth
        three_months_bound = iris.Constraint(time=spans_three_months)

        ### CHECK IF ALL SEASONS HAVE 3 MONTHS ###
        # pr_cube_full_seas_mean = pr_cube_seas_mean.extract(three_months_bound)

        view_data = pr_cube_seas_mean 
#        for season, year in zip(view_data.coord('clim_season')[:10].points,
#            view_data.coord('season_year')[:10].points):
#            print(season + ' ' + str(year))
            
            
        # Select data for specific time period
        print('Extracting data for specified time period')
        pdt1 = PartialDateTime(year=start_year, month=1) # Include preceding year to allow DJF calculation
        pdt2 = PartialDateTime(year=end_year, month=12)

        pr_cube_seas_mean_time_subset = pr_cube_seas_mean.extract(iris.Constraint(time=lambda cell: pdt1 <= cell.point <= pdt2))
#        print(pr_cube_seas_mean_time_subset)

        time_model_subset = pr_cube_seas_mean_time_subset.coord('time')
        time_model_dates_subset = cftime.num2date(time_model_subset.points,time_model_subset.units.origin,calendar=time_model_subset.units.calendar) #'days since 1850-01-01'
#        print(time_model_subset)


        pr_cube_djf_seas_mean = pr_cube_seas_mean_time_subset.extract(iris.Constraint(clim_season='djf'))
        pr_cube_mam_seas_mean = pr_cube_seas_mean_time_subset.extract(iris.Constraint(clim_season='mam'))
        pr_cube_jja_seas_mean = pr_cube_seas_mean_time_subset.extract(iris.Constraint(clim_season='jja'))
        pr_cube_son_seas_mean = pr_cube_seas_mean_time_subset.extract(iris.Constraint(clim_season='son'))
#        print(pr_cube_djf)
        
        
        # Calculate seasonal average across time period
        pr_cube_djf_seas_mean_av = pr_cube_djf_seas_mean.collapsed(['time', 'clim_season', 'season_year'], iris.analysis.MEAN)
#        print(pr_cube_djf_seas_mean_av)

        
        pr_datasets_seas_means[model] = pr_cube_seas_mean_time_subset
        pr_datasets_seas_means_av[model] = pr_cube_djf_seas_mean_av
        
        
    # change output directory to somewhere you can save
    outpath = '/home/users/nat_lord/cmip6_hackathon/data/'
    data_type = 'global'  # directory
    data_nam2 = 'seas'  # directory
    fname = str(variable_id + '_' + data_nam2 + '_' + str(start_year) + '-' + str(end_year) + '_' + expt + '_' + str(latmin) + '_to_' + str(latmax) + 'lat' + '_' + str(lonmin)  + '_to_' + str(lonmax)  + 'lon')
    print('SAVING TO:', outpath + directory_nam + '/' + data_type + '/' + fname)
    print(fname)
    if os.path.exists(outpath + directory_nam + '/' + data_type + '/' + fname):
        os.remove(outpath + directory_nam + '/' + data_type + '/' + fname)
#    np.save(outpath + directory_nam + '/' + data_type + '/' + fname, pr_datasets_seas_means)  # uncomment to save

    
    # change output directory to somewhere you can save
    outpath = '/home/users/nat_lord/cmip6_hackathon/data/'
    data_type = 'global'  # directory
    data_nam = 'av'  # directory
    data_nam2 = 'seas'  # directory
    fname = str(variable_id + '_' + data_nam2 + '_' + str(start_year) + '-' + str(end_year) + data_nam + '_' + expt + '_' + str(latmin) + '_to_' + str(latmax) + 'lat' + '_' + str(lonmin)  + '_to_' + str(lonmax)  + 'lon')
    print('SAVING TO:', outpath + directory_nam + '/' + data_type + '/' + fname)
    print(fname)
    if os.path.exists(outpath + directory_nam + '/' + data_type + '/' + fname):
        os.remove(outpath + directory_nam + '/' + data_type + '/' + fname)
#    np.save(outpath + directory_nam + '/' + data_type + '/' + fname, pr_datasets_seas_means_av)  # uncomment to save

    

ACCESS-CM2  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period




ACCESS-ESM1-5  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
BCC-CSM2-MR  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
CAMS-CSM1-0  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
CanESM5  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
CNRM-CM6-1  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
CNRM-ESM2-1  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
FGOALS-f3-L  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
FGOALS-g3  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time period
HadGEM3-GC31-MM  HISTORICAL
Calculating seasonal averages for  model data
Extracting data for specified time 