In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr

# extras
%matplotlib inline
import metpy.calc as mpcalc
from metpy.units import units
from scipy import stats

# Import my modules
sys.path.append('../modules') # Path to modules
from preprocess_dataframes import combine_ivt_ar_prec_df
from ar_funcs import AR_rank_df

pd.options.display.float_format = "{:,.2f}".format # makes it so pandas tables display only first two decimals

In [2]:
path_to_data = '/home/dnash/SEAK_clim_data/' 
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
## open precipitation and ivt dfs
## append precip to each community IVT df
option = 'a'
temporal_res = 'hourly'
community_lst = ['Hoonah', 'Skagway', 'Klukwan', 'Yakutat', 'Craig', 'Kasaan']

df_lst = combine_ivt_ar_prec_df(option, temporal_res, community_lst) # combine dfs into list of dfs

# start with single df
df = df_lst[0]


In [5]:
%%time
df_lst_new = []
for i, df in enumerate(df_lst):

    # create a column called 'AR_tmp' with a "1" if IVT > 250 kg m-1 s-1
    df['AR_tmp'] = 0
    df.loc[df['IVT']>=250., 'AR_tmp'] = 1
    df
    # calculate duration
    a = df.AR_tmp != 0 # creates a Boolean series where True means an AR is present
    df['duration'] = a.cumsum()-a.cumsum().where(~a).ffill().fillna(0).astype(int)
    df['new_ID'] = a*a.cumsum().where(~a).ffill().fillna(0).astype(int)

    # get maximum IVT and duration and put in new df
    max_IVT = df.groupby('new_ID').apply(lambda x: x['IVT'].max())
    duration = df.groupby('new_ID').apply(lambda x: x['duration'].max())

    data = {'duration': duration,
            'max_IVT': max_IVT}
    tmp = pd.DataFrame(data, index=duration.index) # Create DataFrame

    tmp = tmp.apply(AR_rank_df, axis=1) # apply AR rank calculation

    ## join AR rank with original df

    df = df.join(tmp['rank'], on='new_ID')
    print(df.groupby('rank').count())
    df_lst_new.append(df)


        time     IVT     lat     lon    uIVT    vIVT     IWV  ivtdir      AR  \
rank                                                                           
0.00  338047  338047  338047  338047  338047  338047  338047  338047  338047   
1.00    5943    5943    5943    5943    5943    5943    5943    5943    5943   
2.00    4121    4121    4121    4121    4121    4121    4121    4121    4121   
3.00    2011    2011    2011    2011    2011    2011    2011    2011    2011   
4.00     518     518     518     518     518     518     518     518     518   

      impact   prec  AR_tmp  duration  new_ID  
rank                                           
0.00  338047  92670  338047    338047  338047  
1.00    5943   1941    5943      5943    5943  
2.00    4121   1424    4121      4121    4121  
3.00    2011    485    2011      2011    2011  
4.00     518    110     518       518     518  
        time     IVT     lat     lon    uIVT    vIVT     IWV  ivtdir      AR  \
rank                   