In [108]:

#%matplotlib inline 

###########
#PDF PROCESSING FOR various INPUT 
# DEPENDING ON THE PDF_PROCESSING CLASS
#by Carl Schleussner, Climate Analytics
#carl.schleussner@climateanalytics.org
###########
# IMPORT AND CONFIG 
import numpy as np
import netCDF4 as net
import dimarray as da 
import sys
sys.path.append("./src/")
import itertools
import glob
import datetime
import pickle
import os
import pandas as pd
import matplotlib.pylab as plt 
from netCDF4 import Dataset,netcdftime,num2date

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = 8,6

from matplotlib import rc
rc('text', usetex=True)

os.chdir('/Users/peterpfleiderer/Documents/Projects/0p5_observed/pdf_processing/')
try:
    import pdf_processing as pdf; reload(pdf)
except ImportError:
    raise ImportError(
        "cannot find PDF_Processing code")

###########
# Settings
###########

# PDF Method (currently defined: hist, python_silverman)
pdf_method='python_silverman'

# Set range for years for bootstrap sampling 
bs_range=[1958,2010]

# Variablelist
varinlist=[
    'TXx',
    'TNn',
    'WSDI',
    'RX5',
    'RX1'
          ] 

ref_period=[1960,1979]
target_periods=[[1991,2010],ref_period]
period_names=['Recent','ref']

timeaxis=np.arange(1958,2011)

# Input datasets
datasets=['ERA_regrid','20CR_regrid']



varoutdict={
    datasets[0]:{},    
    datasets[1]:{}   
}


In [111]:
##############################
# 20CR_regrid
###############
dataset='20CR_regrid'
for varin in ['TXx','TNn']:  
    print varin.lower()
   
    in_file=glob.glob('/Users/peterpfleiderer/Box Sync/0p5_observational_record/data/data_climdex/*/'+varin.lower()+'*regrid*')[0]
    nc_in=Dataset(in_file,"r")    
    # handle time information
    try:
        time=nc_in.variables['time'][:]
        datevar = []
        # if no specification
        time_unit=nc_in.variables['time'].units
        try:	
            cal_temps = nc_in.variables['time'].calendar
            datevar.append(num2date(time,units = time_unit,calendar = cal_temps))
        except:
            datevar.append(num2date(time,units = time_unit))
        year=np.array([int(str(date).split("-")[0])	for date in datevar[0][:]])
    except: pass

    lat=nc_in.variables['lat'][:]
    lon=nc_in.variables['lon'][:]

    # Set regular integer time axis, dimarray default always adds month and day. Time range 1951-2015
    relevant_years=np.where((year>=1958) & (year<2010))[0]
    var_in=nc_in.variables['air'][:,:,:]
    if var_in.mean()>150:var_in-=273.15
    
    input_data=da.DimArray(var_in[relevant_years,:,:].squeeze(), axes=[year[relevant_years], lat, lon],dims=['year', 'lat', 'lon'] )
    
    # Mask for data availability (individual for each dataset)
    varoutdict[dataset][varin]=pdf.PDF_Processing(varin)
    varoutdict[dataset][varin].mask_for_ref_period_data_coverage(input_data,ref_period,check_ref_period_only=False,target_periods=target_periods)


txx
tnn


In [113]:
##############
# ERA40 - ERAinterim _regrid
############## 
dataset='ERA_regrid'
for varin in ['TXx','TNn']:  
    print varin.lower()
   
    # ERA40
    in_file=glob.glob('/Users/peterpfleiderer/Box Sync/0p5_observational_record/data/data_climdex/ERA40/'+varin.lower()+'*regrid*')[0]
    nc_in=Dataset(in_file,"r")
    
    # handle time information
    time=nc_in.variables['time'][:]
    datevar = []
    # if no specification
    time_unit=nc_in.variables['time'].units
    try:
        cal_temps = nc_in.variables['time'].calendar
        datevar.append(num2date(time,units = time_unit,calendar = cal_temps))
    except:
        datevar.append(num2date(time,units = time_unit))
    year=np.array([int(str(date).split("-")[0])	for date in datevar[0][:]])
    
    lat=nc_in.variables['lat'][:]
    lon=nc_in.variables['lon'][:]
    # Set regular integer time axis, dimarray default always adds month and day. Time range 1951-2015
    relevant_years=np.where((year>=1958) & (year<1979))[0]
    ERA40=nc_in.variables['t2m'][:,:,:]
    if ERA40.mean()>150:ERA40-=273.15
        
    ERA40=da.DimArray(ERA40[relevant_years,:,:].squeeze(), axes=[year[relevant_years], lat, lon],dims=['year', 'lat', 'lon'] )


    # ERAinterim
    in_file=glob.glob('/Users/peterpfleiderer/Box Sync/0p5_observational_record/data/data_climdex/ERAinterim/'+varin.lower()+'*regrid*')[0]
    nc_in=Dataset(in_file,"r")
    # handle time information
    time=nc_in.variables['time'][:]
    datevar = []
    # if no specification
    time_unit=nc_in.variables['time'].units
    try:
        cal_temps = nc_in.variables['time'].calendar
        datevar.append(num2date(time,units = time_unit,calendar = cal_temps))
    except:
        datevar.append(num2date(time,units = time_unit))
    year=np.array([int(str(date).split("-")[0])	for date in datevar[0][:]])

    lat=nc_in.variables['lat'][:]
    lon=nc_in.variables['lon'][:]
    # Set regular integer time axis, dimarray default always adds month and day. Time range 1951-2015
    relevant_years=np.where((year>=1979) & (year<2010))[0]
    ERAinterim=nc_in.variables['t2m'][:,:,:]
    if ERAinterim.mean()>150:ERAinterim-=273.15

    ERAinterim=da.DimArray(ERAinterim[relevant_years,:,:].squeeze(), axes=[year[relevant_years], lat, lon],dims=['year', 'lat', 'lon'] )

    input_data=da.concatenate((ERA40, ERAinterim), axis='year')
        
    # Mask for data availability (individual for each dataset)
    varoutdict[dataset][varin]=pdf.PDF_Processing(varin)
    varoutdict[dataset][varin].mask_for_ref_period_data_coverage(input_data,ref_period,check_ref_period_only=False,target_periods=target_periods)


txx
tnn


In [13]:
################
# with HadEx2 mask for ERA and 20CR
################
cut_interval_dict={
    'TXx':[-2,3],
    'TNn':[-3,5],
    'WSDI':[-10,20],
    'RX5':[-30,45],
    'RX1':[-30,40]
          }

for varin in ['TXx']:
    for dataset in ['ERA_regrid','20CR_regrid']:
        print dataset
        
        # use HadEx2 mask
        varoutdict[dataset][varin]._masks['global']=varoutdict['HadEX2'][varin]._masks['global']
        
        # Derive time slices
        varoutdict[dataset][varin].derive_time_slices(ref_period,target_periods,period_names)
        varoutdict[dataset][varin].derive_distributions()

        varoutdict[dataset][varin].derive_pdf_difference('ref','Recent',pdf_method=pdf_method,bin_range=cut_interval_dict[varin])

        # Get bootstrapping confidence intervals
        varoutdict[dataset][varin].bootstrapping(bs_range,100)   
        varoutdict[dataset][varin].derive_bootstrapped_conf_interval(pdf_method=pdf_method)

ERA_regrid
Recent [1991, 2010]
ref [1960, 1979]
20CR_regrid
Recent [1991, 2010]
ref [1960, 1979]


In [15]:
with open('../varoutdict_10000_reanalysis_regrid.pkl', 'wb') as output:
    pickle.dump(varoutdict, output, pickle.HIGHEST_PROTOCOL)