In [16]:

#%matplotlib inline 

###########
#PDF PROCESSING FOR various INPUT 
# DEPENDING ON THE PDF_PROCESSING CLASS
#by Carl Schleussner, Climate Analytics
#carl.schleussner@climateanalytics.org
###########
# IMPORT AND CONFIG 
import numpy as np
import netCDF4 as net
import dimarray as da 
import sys
sys.path.append("./src/")
import itertools
import glob
import datetime
import pickle
import os
import pandas as pd
import matplotlib.pylab as plt 
from netCDF4 import Dataset,netcdftime,num2date

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = 8,6

from matplotlib import rc
rc('text', usetex=True)

os.chdir('/Users/peterpfleiderer/Documents/Projects/0p5_observed/pdf_processing/')
try:
    import pdf_processing as pdf; reload(pdf)
except ImportError:
    raise ImportError(
        "cannot find PDF_Processing code")

###########
# Settings
###########

# PDF Method (currently defined: hist, python_silverman)
pdf_method='python_silverman'

# Set range for years for bootstrap sampling 
bs_range=[1958,2010]

# Variablelist
varinlist=[
    'TXx',
    'TNn',
    'WSDI',
    'RX5',
    'RX1'
          ] 

ref_period=[1960,1979]
target_periods=[[1991,2010],ref_period]
period_names=['Recent','ref']

timeaxis=np.arange(1958,2011)

# Input datasets
datasets=['HadEX2','GHCNDEX']

# Set plottint colours
colordict={
    datasets[0]:"#247a9c",
    datasets[1]:"#df1a20",
}


varoutdict={
    datasets[0]:{},    
    datasets[1]:{},    
}


In [17]:
##############
# HadEx2 & GHCND
##############

for varin in varinlist:
    if varin=='TXx':varin_name='TXX'
    if varin=='TNn':varin_name='TNN'
    if varin=='WSDI':varin_name='WSDI'
    if varin=='RX5':varin_name='Rx5day'
    if varin=='RX1':varin_name='Rx1day'
    for dataset in ['HadEX2','GHCNDEX']:
        print varin,dataset
        if dataset =='HadEX2':
            read_in_data=da.read_nc('/Users/peterpfleiderer/Box Sync/0p5_observational_record/data/data_climdex/HadEx2/H2_'+varin_name+'_1901-2010_RegularGrid_global_3.75x2.5deg_LSmask.nc')['Ann']
        elif dataset =='GHCNDEX':
            read_in_data=da.read_nc('/Users/peterpfleiderer/Box Sync/0p5_observational_record/data/data_climdex/GHCND/GHCND_'+varin_name+'_1951-2016_RegularGrid_global_2.5x2.5deg_LSmask.nc')['Ann']
            # Set regular integer time axis, dimarray default always adds month and day. Time range 1951-2015
        input_data=da.DimArray(read_in_data[19580101:20100101,:,:], axes=[timeaxis, read_in_data.lat, read_in_data.lon],dims=['year', 'lat', 'lon'] )
        
        if dataset =='HadEX2':
            # mask GRL
            landmask=input_data.ix[10,:,:].copy()
            landmask[:,:]=1
            GRL_mask=Dataset('support/GRL_73x96_lat_weighted.nc4').variables['GRL'][:,:]
            landmask[np.isfinite(GRL_mask)]=0 
        if dataset == 'GHCNDEX':
            # mask GRL
            landmask=input_data.ix[10,:,:].copy()
            landmask[:,:]=1
            GRL_mask=Dataset('support/GRL_73x144_lat_weighted.nc4').variables['GRL'][:,:]
            landmask[np.isfinite(GRL_mask)]=0 
            
        # Mask for data availability (individual for each dataset)
        varoutdict[dataset][varin]=pdf.PDF_Processing(varin)
        varoutdict[dataset][varin].mask_for_ref_period_data_coverage(input_data,ref_period,check_ref_period_only=False,target_periods=target_periods,landmask=landmask,required_coverage=0.8)


TXx HadEX2
TXx GHCNDEX


In [18]:
###############
# get PDFs and bootstrap
###############

cut_interval_dict={
    'TXx':[-2,3],
    'TNn':[-3,5],
    'WSDI':[-10,20],
    'RX5':[-30,45],
    'RX1':[-30,40]
          }

for varin in varinlist:
    print '\n-------------------',varin
    
    if varin=='TXx': datasets_here=['HadEX2','GHCNDEX']
    if varin!='TXx': datasets_here=['HadEX2','GHCNDEX']
        
    for dataset in datasets_here:
        print dataset
        # relative diff for precipitation indices
        if varin in ['RX5','RX1']:relative_diff=True
        if varin not in ['RX5','RX1']:relative_diff=False
        print "relative_diff=",relative_diff
            
        varoutdict[dataset][varin].derive_time_slices(ref_period,target_periods,period_names)
        varoutdict[dataset][varin].derive_distributions()
            
        varoutdict[dataset][varin].derive_pdf_difference('ref','Recent',pdf_method=pdf_method,bin_range=cut_interval_dict[varin],relative_diff=relative_diff)

        # Get bootstrapping confidence intervals
        varoutdict[dataset][varin].bootstrapping(bs_range,100)   
        varoutdict[dataset][varin].derive_bootstrapped_conf_interval(pdf_method=pdf_method,relative_diff=relative_diff)
        print '...........................'


------------------- TXx
HadEX2
relative_diff= False
Recent [1991, 2010]
ref [1960, 1979]
1088 1094 [-2, 3] 4.40521621704 -2.07496643066 2.97533416748 -1.96370697021
806 1094 [-2, 3] nan nan 1.04709 -0.960503
804 1094 [-2, 3] nan nan 1.17657 -0.90453
809 1094 [-2, 3] nan nan 1.05685 -0.668175
804 1094 [-2, 3] nan nan 1.20968 -0.768724
806 1094 [-2, 3] nan nan 1.45666 -0.632732
805 1094 [-2, 3] nan nan 1.30842 -0.752605
807 1094 [-2, 3] nan nan 1.85377 -1.12635
808 1094 [-2, 3] nan nan 1.36855 -0.59993
805 1094 [-2, 3] nan nan 1.40805 -0.96072




806 1094 [-2, 3] nan nan 0.960503 -1.04709
976 1094 [-2, 3] nan nan 1.18933 -1.7443
984 1094 [-2, 3] nan nan 1.11474 -1.01849
996 1094 [-2, 3] nan nan 1.28246 -1.48323
809 1094 [-2, 3] nan nan 0.952019 -1.12476
980 1094 [-2, 3] nan nan 0.963446 -1.69463
819 1094 [-2, 3] nan nan 1.52538 -0.97934
808 1094 [-2, 3] nan nan 1.29113 -0.912979
986 1094 [-2, 3] nan nan 1.03804 -1.03853
804 1094 [-2, 3] nan nan 0.90453 -1.17657
976 1094 [-2, 3] nan nan 1.7443 -1.18933
983 1094 [-2, 3] nan nan 1.34346 -1.33992
984 1094 [-2, 3] nan nan 1.22498 -0.931967
802 1094 [-2, 3] nan nan 1.437 -1.12147
984 1094 [-2, 3] nan nan 1.07504 -0.887497
807 1094 [-2, 3] nan nan 1.14833 -1.17018
807 1094 [-2, 3] nan nan 1.14547 -1.07166
983 1094 [-2, 3] nan nan 1.19152 -0.936169
809 1094 [-2, 3] nan nan 0.668175 -1.05685
984 1094 [-2, 3] nan nan 1.01849 -1.11474
983 1094 [-2, 3] nan nan 1.33992 -1.34346
988 1094 [-2, 3] nan nan 1.21693 -0.867176
809 1094 [-2, 3] nan nan 0.922806 -1.11971
989 1094 [-2, 3] nan nan 1.0

KeyError: 'TNn'

In [20]:
with open('../varoutdict_10000_obs_0.8.pkl', 'wb') as output:
    pickle.dump(varoutdict, output, pickle.HIGHEST_PROTOCOL)
    
    

In [49]:
###########
# Plot Masks
###########
fig,pl=plt.subplots(nrows=5,ncols=2,figsize=(8,11.5))
pplot=pl.flatten()
k=0
for varin in varinlist:
    for dataset in ['HadEX2','GHCNDEX']:
        mask=varoutdict[dataset][varin]._masks['global'].copy()
        mask[np.isfinite(mask)]=1
        varoutdict[dataset][varin].plot_map(mask,
                                            ax=pplot[k],
                                            color_bar=False,
                                            show=False)
        if k<2:pplot[k].set_title(dataset)
        if (-1)**k>0:pplot[k].set_ylabel(varin)
        k+=1

plt.tight_layout()
plt.savefig('../plots/mask_HadEX2_GHCNDEX_80.png')
plt.savefig('../plots/mask_HadEX2_GHCNDEX_80.pdf')
plt.clf()