# Table of Contents
 <p><div class="lev1"><a href="#Merge-dataframes"><span class="toc-item-num">1&nbsp;&nbsp;</span>Merge dataframes</a></div><div class="lev2"><a href="#In-the-format-of-computeMeanProfileAtQuantile.py"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>In the format of computeMeanProfileAtQuantile.py</a></div>

Modules

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from mpl_toolkits.basemap import Basemap
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib import gridspec
from netCDF4 import Dataset
from math import *
import glob
import string
import re
from datetime import date, time, datetime
from pprint import pprint
import sys,os
import csv
import pandas as pd
import pickle
import matplotlib.transforms as mtransforms


Paths

In [65]:
# Add ../functions to system path
currentpath = %pwd
currentpath = str(currentpath)
##-- In a python script do the following instead:
##-- currentpath = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0,currentpath[:currentpath.rfind('/')+1]+'functions')

## Fixed data files input directory
inputdir_fx = '/Users/bfildier/Data/preprocessed/allExperiments/fx'
## Dataframe input directory
# resultdir = os.path.join(os.path.dirname(currentpath),'results')
resultdir = os.path.join(os.path.dirname(currentpath),'results/fullResults')
local_resultdir = os.path.join(os.path.dirname(currentpath),'results')
## Maps (2D tropical data files) input directory
inputdir_maps = os.path.join(os.path.dirname(currentpath),'inputs/forMaps')

##-- Output directories --##
## Figures
figdir = os.path.join(os.path.dirname(currentpath),'figures/fullResults')


Graphical parameters

In [4]:
###--- Set up graphical parameters ---###

##-- Figure setup --##
plt.rcParams.update({'axes.labelsize': 'x-large',
                     'axes.titlesize': 'x-large',
                     'xtick.labelsize': 'large',
                     'ytick.labelsize': 'large',
                     'figure.titlesize': 'xx-large',
                     'figure.subplot.top': '0.87',
                     'figure.subplot.wspace': '0.3',
                     'figure.subplot.hspace': '0.3',
                     'legend.fontsize':'medium'})

##-- Load colors for plots --##
colorfile = os.path.join(currentpath,'colorsAndTypes.pickle')
with open(colorfile,'rb') as handle:
    col = pickle.load(handle)
    lt = pickle.load(handle)
    pal = pickle.load(handle)
    lw = pickle.load(handle)
    dashes = pickle.load(handle)
    mark = pickle.load(handle)
    cmaps = pickle.load(handle)

Own modules and functions

In [6]:
from importingData import *
from extremeScaling import *
from thermodynamics import *
from thermo_funcs import *
from thermo_constants import *

Global variables and datasets

In [7]:
###--- Datasets ---###

## Dataset parameter sets and variable sets
compsets = ("FSPCAMm_AMIP","FAMIPC5")
experiments = ("piControl","abrupt4xCO2")
subsets = ("tropics","ocean","land","mfzero")
subsets_for_comp = dict(zip(compsets,(subsets,subsets[:-1])))

## Individual names
compset_SPCAM, compset_CAM5 = compsets
experiment_PI, experiment_4xCO2 = experiments
subset_all, subset_ocean, subset_land, subset_mf = subsets


##--- Related to scaling ---##

## Simulation vs. pr scaling
pr_types = ["simulated","scaling"]
## Contributions to fractional changes
# contributions = ["mass","thermo","dyn_mean","dyn_profile","pressure","non_linear","all"]
# contributions_labs = ["Mass","Thermodynamic",r"Mass flux",r"Profile shift","Pressure","Non linear","Sum"]
contributions = ["thermo","dyn_mean","dyn_mean_th","dyn_mean_dyn","dyn_profile","non_linear","all"]
contributions_labs = ["Thermodynamic",r"Mass flux",r"Mass flux ($\Delta \rho$)",r"Mass flux ($\Delta w$)",
                      r"Profile shift","Non linear","Sum"]
contributions_labels = dict(zip(contributions,contributions_labs))

###--- Variables ids ---###

## All pr ids
pr_ids = ["PRECT","CRM_PREC_I90","CRM_PREC_I75","CRM_PREC_I50","CRM_PREC_I25","CRM_PREC_I10"]
gcm_pr_id = pr_ids[0]
crm_pr_ids = pr_ids[1:]
pr_ids_for_compset = {compsets[0]:pr_ids, compsets[1]:[gcm_pr_id]}

area_id_root = "PRECAREA_"
area_ids = [area_id_root+crm_pr_id.split('_')[-1] for crm_pr_id in crm_pr_ids]

frac_id_root = "PRECFRAC_"
frac_ids = [frac_id_root+crm_pr_id.split('_')[-1] for crm_pr_id in crm_pr_ids]

## Define IDs of variables required for plotting profiles
varids_for_profiles = {"PRECT":('PS','T','Q','RELHUM','OMEGA','W')}
for f in [90,75,50,25,10]:
#     varids_for_profiles["CRM_PREC_I%s"%str(f)] = ('PS','CRM_T_I%s'%str(f),'CRM_QC_I%s'%str(f),'CRM_H_I%s'%str(f),
#                                            'CRM_OMEGA_I%s'%str(f),'CRM_W_I%s'%str(f))
    varids_for_profiles["CRM_PREC_I%s"%str(f)] = ('PS','CRM_T_I%s'%str(f),'Q','CRM_H_I%s'%str(f),
                                           'CRM_OMEGA_I%s'%str(f),'CRM_W_I%s'%str(f))

## Define IDs of variables required for computing scaling
varids_for_scaling = {"PRECT":('PS','T','OMEGA','W')}
varids_for_scaling_updrafts = {"PRECT":('PS','T','OMEGA','W')}
for f in [90,75,50,25,10]:
    varids_for_scaling["CRM_PREC_I%s"%str(f)] = ('PS','CRM_T_I%s'%str(f),'CRM_OMEGA_I%s'%str(f),'CRM_W_I%s'%str(f))
    varids_for_scaling_updrafts["CRM_PREC_I%s"%str(f)] = ('PS','CRM_T_I%s'%str(f),'CRM_OMEGA_I%s'%str(f),'CRM_WUP_I%s'%str(f))

## Define percentile values
Q_IL_ext = np.array([90.0, 92.0567, 93.6904, 94.9881, 96.0189, 96.8377, 97.4881, 98.0047, 98.4151, 98.7411,
        99.0, 99.2057, 99.369, 99.4988, 99.6019, 99.6838, 99.7488, 99.8005, 99.8415, 99.8741,
        99.9, 99.9206, 99.9369, 99.9499, 99.9602, 99.9684, 99.9749, 99.98, 99.9842, 99.9874,
        99.99, 99.9921, 99.9937, 99.995, 99.996, 99.9968, 99.9975, 99.998, 99.9984, 99.9987,99.999])
Qs = [90.,96.8377,99.,99.6838,99.9,99.9684,99.99,99.9968]
Q_ids = ['90.0','96.8377','99.0','99.6838','99.9','99.9684','99.99','99.9968']

###--- Legends, titles and labels ---###
    
## Corresponding legend labels
compset_labs = ["SPCAM","CAM5"]
compset_labels = dict(zip(compsets,compset_labs))

exp_labs = ["Pre-industrial","4xCO2"]
exp_labels = dict(zip(experiments,exp_labs))

subset_labs = ["Tropics","Tropical oceans","Tropical land","Zero mass flux"]
subset_labels = dict(zip(subsets,subset_labs))

pr_labs = [r"$P(GCM)$",r"$P(CRM)$ largest $90\%$",r"$P(CRM)$ largest $75\%$",r"$P(CRM)$ largest $50\%$",
             r"$P(CRM)$ largest $25\%$",r"$P(CRM)$ largest $10\%$"]
pr_labels = dict(zip(pr_ids,pr_labs))
pr_labs_short = [r"$P(GCM)$",r"${\left[P(CRM)\right]}^{90\%}$",r"${\left[P(CRM)\right]}^{75\%}$",
                 r"${\left[P(CRM)\right]}^{50\%}$",
             r"${\left[P(CRM)\right]}^{25\%}$",r"${\left[P(CRM)\right]}^{10\%}$"]
pr_labels_short = dict(zip(pr_ids,pr_labs_short))
pr_labs_veryshort = [r"$P_{GCM}$",r"$P_{CRM}^{90\%}$",r"$P_{CRM}^{75\%}$",
                 r"$P_{CRM}^{50\%}$",r"$P_{CRM}^{25\%}$",r"$P_{CRM}^{10\%}$"]
pr_labels_veryshort = dict(zip(pr_ids,pr_labs_veryshort))
pr_labs_vvshort = [r"$P_{GCM}$",r"$P_{CRM}$",r"$P_{CRM}$",r"$P_{CRM}$",r"$P_{CRM}$",r"$P_{CRM}$"]
pr_labels_vvshort = dict(zip(pr_ids,pr_labs_vvshort))


###--- Results and others ---###

## Vertical pressure coordinate function
lev_file = 'lev_fx_CESM111-SPCAM20_allExperiments_r0i0p0.nc'
computeP = getPressureCoordinateFunction(os.path.join(inputdir_fx,lev_file))

# ## Import all dataframes
# df_quantile = getStatDataFrame("pr_quantile_IL",compsets,experiments,subsets,resultdir)
# df_density2D = getStatDataFrame("pr_jointDensity_IL",compsets,experiments,subsets,resultdir)
# df_mean = getStatDataFrame("var2D_mean",compsets,experiments,subsets,resultdir)
# df_meanProfile = getStatDataFrame("var3D_meanProfile",compsets,experiments,subsets,resultdir)
# df_meanAtQ = getStatDataFrame("var2D_meanAtQ",compsets,experiments,subsets,resultdir)
# df_meanProfileAtQ = getStatDataFrame("var3D_meanProfileAtQ",compsets,experiments,subsets,resultdir)

# Merge dataframes

## In the format of computeMeanProfileAtQuantile.py

Open a dataframe as created by computeMeanProfileAtQuantile

In [154]:
stat = "var3D_meanProfileAtQ"
compset = compset_SPCAM
experiment = experiment_PI
subset = subset_all
csvfile = "%s_%s_%s_%s.csv"%(stat,compset,experiment,subset)
print "Open file:", csvfile
df = pd.read_csv(os.path.join(local_resultdir,csvfile),
                header=[0,1,2], skipinitialspace=True)
# df = pd.read_csv(os.path.join(resultdir,csvfile),
#                 header=[0,1,2], skipinitialspace=True)

Open file: var3D_meanProfileAtQ_FSPCAMm_AMIP_piControl_tropics.csv


Explore it a little bit

In [155]:
print "index", df.index

df_levels = df.columns.levels
print "columns", df_levels
print len(df_levels)
# print df
print type(df['CRM_OMEGA_I50']), type(df['CRM_OMEGA_I50']['CRM_PREC_I50']), type(df['CRM_OMEGA_I50']['CRM_PREC_I50']['99.0'])
print "df['CRM_OMEGA_I50']['CRM_PREC_I50'][-2:] :",df['CRM_OMEGA_I50']['CRM_PREC_I50'][-2:]

# print df.columns.lexsort_depth
# df.sortlevel(axis=1,inplace=True)

index RangeIndex(start=0, stop=30, step=1)
columns [[u'CRM_QC_I10', u'CRM_QC_I25', u'CRM_QC_I50', u'CRM_QC_I75', u'CRM_QC_I90', u'CRM_QPC_I10', u'CRM_QPC_I25', u'CRM_QPC_I50', u'CRM_QPC_I75', u'CRM_QPC_I90', u'CRM_T_I10', u'CRM_T_I25', u'CRM_T_I50', u'CRM_T_I75', u'CRM_T_I90', u'CRM_W_I10', u'CRM_W_I25', u'CRM_W_I50', u'CRM_W_I75', u'CRM_W_I90', u'EVAPPREC', u'FU', u'FV', u'OMEGA', u'PDELDRY', u'Q', u'RELHUM', u'SPMC', u'SPMCUP', u'T', u'U', u'Unnamed: 0_level_0', u'V'], [u'CRM_PREC_I10', u'CRM_PREC_I25', u'CRM_PREC_I50', u'CRM_PREC_I75', u'CRM_PREC_I90', u'PRECT', u'Unnamed: 0_level_1'], [u'90.0', u'92.0567', u'93.6904', u'94.9881', u'96.0189', u'96.8377', u'97.4881', u'98.0047', u'98.4151', u'98.7411', u'99.0', u'99.2057', u'99.369', u'99.4988', u'99.6019', u'99.6838', u'99.7488', u'99.8005', u'99.8415', u'Unnamed: 0_level_2']]
3


KeyError: 'CRM_OMEGA_I50'

In [87]:
print "Copy dataframe"
df_new = df.copy()

Copy dataframe


In [151]:
dict_new = dict(df)
print dict_new.keys()[:10]
# dict_new[('NEWVAR_TEST','PRECT','99.995')] = dict_new[('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.0')].copy()
dict_new_2 = {('NEWVAR_TEST','PRECT','99.995'):dict_new[('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.0')].copy()}
print type(dict_new[('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.0')]), type(dict_new_2[('NEWVAR_TEST','PRECT','99.995')])
dict_merged = dict_new.copy()
# for key in dict_new_2.keys():
#     if "Unnamed" in string.join(key,''):
#         dict_new_2.pop(key,None)
# for key in dict_merged.keys():
#     if "Unnamed" in string.join(key,''):
#         dict_merged.pop(key,None)
dict_merged.update(dict_new_2)
# Convert to dataframes
df_new_2 = pd.DataFrame(dict_new_2)
df_new = pd.DataFrame(dict_new)
df_merged = pd.DataFrame(dict_merged)
# Compate
print np.array(df_new.columns.levels)
print np.array(df_new_2.columns.levels)
print np.array(df_merged.columns.levels)
# print df_new['NEWVAR_TEST']

[('CRM_OMEGA_I50', 'CRM_PREC_I50', '98.7411'), ('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.2057'), ('Unnamed: 0_level_0', 'Unnamed: 0_level_1', 'Unnamed: 0_level_2'), ('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.0'), ('varid', 'pr_id', 'Q_id'), ('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.369'), ('CRM_OMEGA_I50', 'CRM_PREC_I50', '99.4988')]
<class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>
[Index([u'CRM_OMEGA_I50', u'Unnamed: 0_level_0', u'varid'], dtype='object')
 Index([u'CRM_PREC_I50', u'Unnamed: 0_level_1', u'pr_id'], dtype='object')
 Index([u'98.7411', u'99.0', u'99.2057', u'99.369', u'99.4988', u'Q_id',
       u'Unnamed: 0_level_2'],
      dtype='object')]
[['NEWVAR_TEST']
 ['PRECT']
 ['99.995']]
[ Index([u'CRM_OMEGA_I50', u'NEWVAR_TEST', u'Unnamed: 0_level_0', u'varid'], dtype='object')
 Index([u'CRM_PREC_I50', u'PRECT', u'Unnamed: 0_level_1', u'pr_id'], dtype='object')
 Index([u'98.7411', u'99.0', u'99.2057', u'99.369', u'99.4988', u'99.995',
       u'Q_id', u'Unnamed: 0_leve

In [117]:
# df_merged['CRM_OMEGA_I50']
print df_new_2
print df_merged

   NEWVAR_TEST
         PRECT
        99.995
0     0.000000
1     0.003526
2     0.008529
3     0.009783
4    -0.004921
5    -0.064853
6    -0.185703
7    -0.327109
8    -0.633003
9    -1.040630
10   -1.853387
11   -2.204890
12   -1.806084
13   -1.360950
14   -1.116030
15   -0.881517
16   -0.834936
17   -0.649643
18   -0.328893
19   -0.103114
20   -0.032754
21    0.009055
22   -0.038939
23   -0.042699
24    0.074669
25   -0.057368
26   -0.006835
27   -0.060250
28    0.031979
29   -0.010609
   CRM_OMEGA_I50                                                    \
    CRM_PREC_I50                                                     
            99.0   99.2057    99.369   99.4988   99.8005   99.8415   
0       0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1       0.003526  0.005386  0.009278  0.006220  0.001118 -0.001314   
2       0.008529  0.013145  0.026811  0.018582 -0.000880 -0.006805   
3       0.009783  0.020349  0.050117  0.033808 -0.013988 -0.024372   
4      -0.00492

In [124]:
d1 = dict(zip([1,2,3],['a','b','c']))
d2 = dict(zip([3,4],['c\'','d']))
print d1, d2
d1.update(d2)
print d1

{1: 'a', 2: 'b', 3: 'c'} {3: "c'", 4: 'd'}
{1: 'a', 2: 'b', 3: "c'", 4: 'd'}
