In [None]:
from __future__ import print_function, division,generators
import numpy as np
import numpy.ma as ma
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy as sci
import seaborn as sns
import os
import glob
import datetime as dt
from scipy.stats import norm as scipy_stats_norm
%matplotlib inline

###1. Read the station data downloaded from GHCN archive###
Data obtained from http://www.ncdc.noaa.gov/cdo-web/

In [None]:
def get_date(date_number):
    """
    Turn the int64 value from the DATE of GHCN into a pd.datetime
    """
    dstring = str(date_number)
    return pd.datetime(int(dstring[0:4]),int(dstring[4:6]),int(dstring[6:8]))

def get_df(fnm, var, no_missing = True):
    """
    Create a dataframe for a single station, with a time index, for a single
    variable of data given as a key word (e.g. PRECIP, TMAX, TMIN).
    Requires file path and name (fnm).
    no_missing is a Bool that optionally masks out values < -99 from the df.
    """
    df = pd.read_csv(fnm)
    dt_indx = [get_date(date) for date in df.DATE]
    data_vals = df[var].values
    if var is 'PRCP':
        data_vals = data_vals / 10.  # This is to convert precip data to mm
    if no_missing:
        tmp_df = pd.DataFrame(data=data_vals,
                              index=dt_indx,columns=[df.STATION[0][6:]])
        mask = tmp_df > -99.  # A catchall value for missing data in GHCN
        return tmp_df[mask]
    else:
        return pd.DataFrame(data=data_vals,
                            index=dt_indx,columns=[df.STATION[0][6:]])

def get_combined_df(fpth, var):
    """
    From a given file path, and variable, extract data from all .csv files, and
    place in a single dataframe object.
    """
    flist = glob.glob(fpth)
    df_dic = {}
    for f in flist:
        df_dic[f[5:]] = get_df(fnm = f, var = var, no_missing=True)
    return pd.concat([df_dic[key] for key in df_dic.keys()],axis=1)

Call the Get_combined() function to create dataframes out of all data in a folder.

In [None]:
%%time
df_tmax = get_combined_df(fpth="Data/*.csv",var="TMAX")
df_tmin = get_combined_df(fpth="Data/*.csv",var="TMIN")
df_prcp = get_combined_df(fpth="Data/*.csv",var="PRCP")

In [None]:
for station in df_prcp:
    print(station, np.max(df_prcp[station]))

Plot time series of precipitation for all stations, and also accumulate the data and plot the average rainfall.

In [None]:
# Example of masking and accessing data from stations...
#station = df_prcp.keys()[1]
#plt.plot(df_prcp[station].index,df_prcp[station],'.',alpha=0.5)
#plt.title("Station {0:s}".format(station))

In [None]:
#df_prcp.KE000063740[df_prcp.KE000063740 > 100]

###2. Time series plots###

Daily Mean and SEM values: Mean uncertainty is given by SEM, where:
$SEM = \frac{\sigma}{\sqrt{n-1}}$ 

In [None]:
def calc_SEM(data):
    """
    Calculate Standard error of the mean. No nan's 
    should be in the input (numpy) array.
    """
    return np.std(data)/np.sqrt(len(data) - 1)


def gather_daily_stats(date, df):
    """
    For a specified day, given by date, create a short array of 
    observed values (obs) excluding the NANs. Return the mean, 
    and SEM value.
    Restrictions: more than one observation on a day, not a missing
    value, less than 300 mm per day (which is erroneous).
    """
    obs = np.array([df_prcp[key][day] for key in df_prcp.keys()])
    obs = obs[(obs > -1) & (obs < 500)]
    
    if len(obs) < 2:
        return np.NAN, np.NAN
    return np.mean(obs), calc_SEM(obs)


In [None]:
#MAD based outlier calculation.
#def rej_Olier(data, thresh = 0.):
#    """
#    Calculate biweights of mean to reject outliers in df_prcp. No nan's 
#    should  also be in the input (numpy) array.
#    """
#    diff = np.abs(data - np.median(data))
#    mad = np.median(diff)   #median of the absolute deviation
#    mod_obs = diff/mad if mad else 0.
#    return data[mod_obs > thresh]

In [None]:
# Create an accumulated time series (with SEM uncertainty values)
means = []
sems = []
for day in df_prcp.index:
    tmp_mean, tmp_sem = gather_daily_stats(date=day, df=df_prcp)
    means.append(tmp_mean)
    sems.append(tmp_sem)
means = np.array(means)
sems = np.array(sems)
df_prcp['Accumulated']=pd.Series(means,index=df_prcp.index)  #adding columns to the dataframe!
df_prcp['Acc_SEM']=pd.Series(sems,index=df_prcp.index)

In [None]:
daily_ts = plt.figure(dpi=72)
daily_ts.set_size_inches(15,5)      # Specify the figure size
ax1 = daily_ts.add_subplot(111)     # Add an axis frame object to the plot (i.e. a pannel)

#ax1.plot(df_prcp.index, df_prcp.Accumulated,'.g',ms=2.0)

ax1.errorbar(df_prcp.index, df_prcp.Accumulated,
             yerr=df_prcp.Acc_SEM, alpha=0.25, fmt=',')
ax1.set_ylim(0,120)
plt.xlim('1950-01-01','2015-12-31')
plt.title("Mean East African Precipitation")
plt.ylabel("Precipitation (mm day$^{-1}$)")
plt.xlabel("Year")
plt.grid(True)
plt.show(daily_ts)
#daily_ts.savefig('Daily_ts.pdf',dpi=300)

###3. Density plots###

In [None]:
# N.b. the KDE (kernel density estimate) is Gaussian - which is not true
# for precip data (log or power law data)...
mask = df_prcp.Accumulated > 0.0
daily_dp = plt.figure()
daily_dp.set_size_inches(12,5)
ax = daily_dp.add_subplot(122)

sns.distplot(df_prcp.Accumulated[mask],bins=100,norm_hist = True,kde=False,color = 'r')
sns.kdeplot(df_prcp.Accumulated[mask],shade=True,kernel='cos',cumulative=False,color='b')
leg1=ax.legend(['KDE','Accumulated mean'],prop={'size':11},
                numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax.set_xlim(0,25)
ax.set_title("East Africa Mean Precipitation")
ax.set_xlabel(r'Precip. (mm day$^{-1}$)')
ax.set_ylabel('Density (0-1)')

plt.show(daily_dp)
#daily_dp.savefig('Densityplot.pdf',dpi=300)

In [None]:
#df_prcp.Accumulated[df_prcp.Accumulated>20]

In [None]:
sns.kdeplot # Hot tip - look in SEABORN for statistical plots and help...

Tasks:
1. find out why the later part of the data has high variability
2. make sure you are happy/add any logical restrictions to improve the data quality in Accumulated dataset
3. Caclulate population statistics, histrogram, density plots (PDF, CDF), and fits to the population. Try several fit approaches, and show which is best.
4. Use the CDF (or a percentile function) to determine the key (IQR, median, tails etc) of the population
5. (hard) try to fit to the population. Reccomend trying a nth order polyfit using np.polyfit()
6. Use the statistical threshold values to define 'extreme' precipitation, and work out the:
  * frequency of extreme events,
  * duration (lenght) of extreme events,
  * magnitude (intensity) of extreme events
  
For task 6, you can plot these statistics as time dependent, or distributions, or something else...

In [None]:
#plt.hist(df_prcp.Accumulated[mask], bins=60)
#plt.show()

In [None]:
#Histogram
#source code from https://github.com/benlaken/Tanzania/blob/master/Precipitation_Tanzania.ipynb
hist_dp = plt.figure()
hist_dp.set_size_inches(5,5)          # Specify the output size
ax1 = hist_dp.add_subplot(211)        # Add an axis frame object to the plot (i.e. a pannel)
ax2 = hist_dp.add_subplot(212)        # Add an axis frame object to the plot (i.e. a pannel)

# the histogram of the data
ax1.set_title(r' Mean East African Precipitation')
n, bins, patches = ax1.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
                            histtype='stepfilled')
ax1.grid(True)
ax1.set_ylabel('Density')
n, bins, patches = ax2.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
                            histtype='stepfilled',cumulative=True)
plt.xlabel(r'mm day$^{-1}$')
plt.ylabel('Cumulative density')
plt.grid(True)

plt.show()
#hist_dp.savefig('Density_plots.pdf',dpi=300)

In [None]:
   # define extreme quantiles
percentileZero    = min(df_prcp.Accumulated[mask])
percentileHundred = max(df_prcp.Accumulated[mask])

print('Min. precip', percentileZero)
print('Max. precip', percentileHundred)
print("Median", np.percentile(df_prcp.Accumulated[mask],50))

In [None]:
srtd = sorted(df_prcp.Accumulated[mask])
percent = [val/len(srtd) * 100. for val in range(len(srtd))]
plt.plot(percent,srtd)
plt.grid(True)

In [None]:
print(np.percentile(df_prcp.Accumulated[mask],90))
print(np.percentile(srtd,10))

###4. Seasonality###

Calculate the DOY mean over the data-period (climatology).

In [None]:
doy_mean=[]
doy_sem =[]

for doy in range(366):
    index = df_prcp.index.dayofyear == doy+1 
    #print(index)
    doy_mean.append(np.nanmean(df_prcp['Accumulated'][index]))
    doy_sem.append(calc_SEM(df_prcp['Accumulated'][index]))

doy_mean = np.array(doy_mean)
doy_sem = np.array(doy_sem)

In [None]:
#Plot the seasonal climatology East Africa precip data
mnths= ['Jan','Feb','Mar','Apr','May','June','Jul','Aug','Sep','Oct','Nov','Dec']
#mrange = arange(12)

my_sclim = plt.figure(dpi=72)
my_sclim.set_size_inches(15,6)        # Specify the output size
ax1 = my_sclim.add_subplot(111)        # Add an axis frame object to the plot (i.e. a pannel)

ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, alpha=0.8)
plt.xlim(0,max(range(366)))
plt.title("East Africa DOY Mean ($\mu$) Rainfall")
plt.ylabel("Precipitation (mm)")
plt.xlabel("Day of Year (DOY)")
plt.grid(True)  
#my_sclim.savefig('My_SeasonalClimatology_plot.pdf',dpi=300)

Anomaly
  * Use the seasonal DOY mean to calculate deviations (anomaly) from the daily mean

In [None]:
# wordy example of how to access/calculate anomaly
for daily_rain in zip(df_prcp.index[5000:5003],df_prcp.Accumulated[5000:5003]):
    print('Day {0}, rainfall {1:3.2f}mm'.format(daily_rain[0].date(),daily_rain[1]))
    print('DOY is',daily_rain[0].dayofyear)
    print("DOY climo value is {0:3.2f}".format(doy_mean[daily_rain[0].dayofyear -1]))
    print("Daily anomaly is {0:3.2f}".format(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1]))
    print(np.isnan(daily_rain[1]))
    print("")

In [None]:
#---Create a seasonal deviation from climatology--
#Anomalies = Observation - Climatology
prcp_anom = []
for daily_rain in zip(df_prcp.index,df_prcp.Accumulated):
    if np.isnan(daily_rain[1]):
        prcp_anom.append(np.NAN)
    else:
        prcp_anom.append(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1])
prcp_anom = np.array(prcp_anom)

In [None]:
df_prcp['Acc_anomaly'] = prcp_anom  #adding columns to the dataframe!

In [None]:
#plt.plot(df_prcp.index[prcp_anom > -999.],prcp_anom[prcp_anom > -999.],alpha=0.5)
#df_prcp

In [None]:
# ---plot the anomalized rainfall data with errors---
my_anom = plt.figure(dpi=72)
my_anom.set_size_inches(15,6)        # Specify the output size
ax1 = my_anom.add_subplot(111)        # Add an axis frame object to the plot (i.e. a pannel)

ax1.errorbar(df_prcp['Acc_anomaly'].index,df_prcp['Acc_anomaly'],yerr=df_prcp['Acc_SEM'],xerr=None,alpha=0.5)
ax1.set_ylim(-10,100)
plt.xlim('1953-01-01','2013-12-31')
ax1.set_title(r'Deseasonalized Precipitation ($\delta$Precip.)')
ax1.set_ylabel(r'Anomalized Precip')
ax1.set_xlabel('Years')
ax1.grid(True)

#plt.legend(framealpha=0.9)
plt.show(my_anom)
#my_anom.savefig('EA anomalized.pdf',dpi=300)

In [None]:
#doy_values = [doy.dayofyear - 1 for doy in df_prcp.index]
figx = plt.figure(dpi=72)
figx.set_size_inches(10,5)      # Specify the figure size
ax1 = figx.add_subplot(111)   

#---Plot the seasonal climatology East Africa precip data---
ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, alpha=0.6, )
ax1.plot(df_prcp.index.dayofyear -1 ,df_prcp['Accumulated'],'.',ms=2.5,alpha=1.0,color='r')
plt.xlim(0,max(range(366)))
plt.title("")
plt.ylabel("Precipitation (mm)")
plt.xlabel("Day of Year (DOY)")
plt.grid(True)

###5. Extreme Precip Events ###

####Extreme events have been defined  by absolute threshhold set by SWFDP-RSMC-Nairobi####

In [None]:
#Extreme rainfall events
high_risk = df_prcp.Accumulated[df_prcp.Accumulated > 50]
medium_risk = df_prcp.Accumulated[(df_prcp.Accumulated > 30) & (df_prcp.Accumulated < 50)]
low_risk = df_prcp.Accumulated[(df_prcp.Accumulated > 20) & (df_prcp.Accumulated < 30)]
no_risk = df_prcp.Accumulated[df_prcp.Accumulated < 20]

In [None]:
daily_floodrisk = plt.figure(dpi=72)
daily_floodrisk.set_size_inches(15,5)      # Specify the figure size
ax1 = daily_floodrisk.add_subplot(111)     #

ax1.plot(high_risk.index, high_risk,'ro',alpha=1.,ms=2)
ax1.plot(medium_risk.index, medium_risk,'bo',alpha=0.9,ms=2)
ax1.plot(low_risk.index, low_risk,'co',alpha=0.9,ms=2)
ax1.plot(no_risk.index, no_risk,'go',alpha=0.9,ms=2)
leg1=ax1.legend(['high risk','medium risk','low risk','no risk'],
                prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
#plt.xlim('1950-01-01','2015-12-31')
plt.title(r"Mean East African Precipitation")
plt.ylabel(r"Precipitation (mm day$^{-1}$)")
plt.xlabel("Year")
plt.grid(True)
plt.show(daily_floodrisk)

#daily_ts.savefig('Daily_floodrisk.pdf',dpi=300)

In [None]:
print(high_risk.index, df_prcp.index[df_prcp.Accumulated > 50])

In [None]:
#Frequency of extreme events
floodrisk_freq = []
yr_day_count = []
years = []
for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    tmp_yr_data = df_prcp["Accumulated"][df_prcp.index.year == year]  # pool data for each year
    #print(tmp_yr_data)
    yr_day_count.append(tmp_yr_data.count())
    years.append(year)
    if tmp_yr_data.count() > 1:
        floodrisk_freq.append(len(tmp_yr_data[no_risk]))
        #print(len(tmp_yr_data[no_risk]))
    else:
        floodrisk_freq.append(np.NAN)   
        
floodrisk_freq = np.array(floodrisk_freq)        


In [None]:
#df_prcp.Accumulated[5000:5005]

#### Extreme events based on statistical values of daily anomalies and percentiles

In [None]:
flood_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],90)
drought_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],10)

print('90th percentile = ',flood_threshold)
print('10th percentile = ',drought_threshold)
print('50th percentile = ',np.percentile(df_prcp['Acc_anomaly'][mask],50))
#sns.distplot(df_prcp['Acc_anomaly'][mask])

In [None]:
my_dist = plt.figure()
my_dist.set_size_inches(15,5)               # Specify the output size
ax1 = my_dist.add_subplot(121)              # Add an axis frame object to the plot (i.e. a pannel)


sns.distplot(df_prcp['Acc_anomaly'][mask],bins=100, norm_hist=True, kde=False) # Filled bars  
sns.kdeplot(df_prcp['Acc_anomaly'][mask],shade=False,kernel='gau',cumulative=False,color='r',lw=1.5)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],90), 0.00, 0.2, colors='b',linestyle='--',lw=1.0)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],50), 0.00, 0.2, colors='b',lw=1.0) #Marker line of Median
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],10), 0.00, 0.2, colors='b',linestyle='-.',lw=1.0)
leg1=ax1.legend(['KDE','90th percentile','50th percentile','10th percentile','observed anomalies'],
                prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax1.grid(True)
ax1.set_ylabel(r'Density',fontsize=11)
ax1.set_xlabel('Accumulated precip. anomaly (mm)',fontsize=11)
ax1.set_xlim(-10, 40)
ax1.set_ylim(0.00, 0.2)


plt.show(my_dist)
daily_ts.savefig('Daily_ts.pdf',dpi=300)

#my_dist.savefig("EA_Normalized_Percentile.pdf",dpi=300,transparent=True)

In [None]:
# Make a mask for the df_prcp to identify the extreme dates (flood and drought)
extremes = ((df_prcp['Acc_anomaly'] > flood_threshold) | (df_prcp['Acc_anomaly'] < drought_threshold))
flood = (df_prcp['Acc_anomaly'] > flood_threshold)
drought = (df_prcp['Acc_anomaly'] < drought_threshold)

In [None]:
fig_threshold = plt.figure(dpi=72)
fig_threshold.set_size_inches(10,5)      # Specify the figure size
ax1 = fig_threshold.add_subplot(111)   
ax1.scatter(df_prcp['Acc_anomaly'][mask].index, df_prcp['Acc_anomaly'][mask],
            alpha=0.1, marker='.')
ax1.scatter(df_prcp['Acc_anomaly'][extremes].index, df_prcp['Acc_anomaly'][extremes],
            alpha=0.3, marker='.', color='r')
plt.title("Extreme rainfall based on threshold value detection")
plt.ylabel("Precipitation anomaly (mm)")
plt.xlabel("Year")
plt.xlim('1953-01-01','1970-12-31')
ax1.grid(True)
#fig_threshold.savefig('Extreme_Threshhold_plot.pdf',dpi=300)

Intensity, Duration and Frequency of extreme events based on defined statistical extreme threshold

In [None]:
# You can use groupby to querey your dataset 
#pd.groupby?

In [None]:
# or you can write hacks like this, to pull out data based on the index
#---Splitting the data into groups based on extreme threshhold
for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    wet_extreme = df_prcp["Acc_anomaly"][flood][df_prcp["Acc_anomaly"][flood].index.year == year]
    dry_extreme = df_prcp["Acc_anomaly"][drought][df_prcp["Acc_anomaly"][drought].index.year == year]
    
    print(year,len(wet_extreme), year,len(dry_extreme))
    break 
# eitherway, do statistics on the frequency, intensity, and duration of flood and drought events
# e.g. a time-series. More distributions, etc. 

In [None]:
flood_freq = []
drought_freq = []
yr_day_count = []
years = []
flood_mean=[]
flood_sem =[]
drought_mean=[]
drought_sem =[]

for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    tmp_yr_data = df_prcp["Acc_anomaly"][df_prcp.index.year == year]  # pool data for each year
    #print(tmp_yr_data)
    yr_day_count.append(tmp_yr_data.count())
    years.append(year)
    if tmp_yr_data.count() > 1:
        flood_freq.append(len(tmp_yr_data[flood]))
        #print(len(tmp_yr_data[flood]))
        drought_freq.append(len(tmp_yr_data[drought]))
        flood_mean.append(np.nanmean(tmp_yr_data[flood]))
        flood_sem.append(calc_SEM(tmp_yr_data[flood]))
        drought_mean.append(np.nanmean(tmp_yr_data[drought]))
        drought_sem.append(calc_SEM(tmp_yr_data[drought]))
    else:
        flood_freq.append(np.NAN)
        drought_freq.append(np.NAN)
     
        flood_mean.append(np.NAN)
        flood_sem.append(np.NAN)
        drought_mean.append(np.NAN)
        drought_sem.append(np.NAN)
        
    
flood_freq = np.array(flood_freq)
drought_freq = np.array(drought_freq)
yr_day_count = np.array(yr_day_count)
years = np.array(years)
flood_mean = np.array(flood_mean)
flood_sem = np.array(flood_sem)
drought_mean = np.array(drought_mean)
drought_sem = np.array(drought_sem)

In [None]:
#Intensity
my_int = plt.figure(dpi=72)
my_int.set_size_inches(15,5)        # Specify the output size
ax1 = my_int.add_subplot(121)
ax2 = my_int.add_subplot(122)

ax1.errorbar(years[yr_day_count > 350],flood_mean[yr_day_count > 350],  #Masking missing values
             xerr=None, yerr=flood_sem[yr_day_count > 350],color='b', alpha=1.)
ax1.set_title('Mean ($\mu$) intensity of flood events in East Africa\n based on threshold value detection')
ax1.set_ylabel(r'Intensity')
ax1.set_xlabel(r'Year')
ax1.grid(True)

ax2.errorbar(years[yr_day_count > 350],drought_mean[yr_day_count > 350],
             xerr=None, yerr=drought_sem[yr_day_count > 350], color='r', alpha=1.)
ax2.set_title(' Mean ($\mu$)intensity of drought events in East Africa\n based on threshold value detection ')
ax2.set_xlabel(r"Years")
ax2.set_ylabel('Intensity')
ax2.grid(True)
#my_int.savefig('My_intensity_plot.pdf',dpi=300)

In [None]:
#Frequency 
my_ = plt.figure(dpi=72)
my_.set_size_inches(15,5)        # Specify the output size
ax = my_.add_subplot(121)        # Add an axis frame object to the plot (i.e. a pannel)
ax1 = my_.add_subplot(122) 

ax.plot(years[yr_day_count > 350],flood_freq[yr_day_count > 350], 'bd', alpha=0.8)
ax.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350], 'rd',alpha=0.8)
leg=ax.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
                frameon=True,fancybox=True)

ax.set_ylim(0,100)
ax.set_title('Integer count of Extreme precip events\n based on threshold value detection')
ax.set_ylabel(r'Number of Extreme events (counts)')
ax.set_xlabel('Years')
ax.grid(True)


ax1.plot(years[yr_day_count > 350], flood_freq[yr_day_count > 350]/
        yr_day_count[yr_day_count > 350], 'bd', alpha=1.)
ax1.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350]/
        yr_day_count[yr_day_count > 350],'rd', alpha=1.)
leg=ax1.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
                frameon=True,fancybox=True)

#ax.set_ylim(0,100)
ax1.set_title('Extreme precip events\n based on threshold value detection')
ax1.set_ylabel(r'Number of Extreme events (counts)')
ax1.set_xlabel('Years')
ax1.grid(True)

plt.show(my_)
#my_.savefig('My_Frequency_plot.pdf',dpi=300)

Duration
  * If you want to look/do operations on time diffrences, this is called timedelta in the Pandas / datetime packages.

In [None]:
test = tmp_yr_data[drought].index[1] - tmp_yr_data[drought].index[0]
print("diffrence in days between first and second flood:",test.days)

In [None]:
#Duration
flood_time = []
drought_time = []
index = []

for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    tmp_yr_data = df_prcp["Acc_anomaly"][df_prcp.index.year == year]
    index.append(tmp_yr_data.index)
    #print(tmp_yr_data.index)
    if tmp_yr_data.count() > 1:
        count = 1
        for n, date in enumerate(tmp_yr_data[flood].index[count - 1:]):
            flood_time.append((tmp_yr_data[flood].index[n] - tmp_yr_data[flood].index[n-1]).days)
            #print(date.date(), (tmp_yr_data[flood].index[n] - tmp_yr_data[flood].index[n-1]))
            #drought_time.append((tmp_yr_data[drought].index[n] - tmp_yr_data[drought].index[n-1]))
            
    else:
        flood_time.append(np.NAN)
        #drought_time.append(np.NAN) 

index = np.array(index)        
flood_time = np.array(flood_time)
#drought_time = np.array(drought_time)        

In [None]:
mask = flood_time > 0.0
#plt.plot(flood_time.index,flood_time[mask],'.r',ms=2.0,alpha=0.75)
plt.plot(flood_time[mask])

#### extreme event based on cumulative statistical values (boxcar approach)####

In [None]:
# Type of boxcar function
test = pd.rolling_sum(df_prcp["Accumulated"], window=2, min_periods=2, center = True)
plt.plot(test.index,test,'.r',ms=2.0,alpha=0.75)

In [None]:
window_size = 2
for n, date in enumerate(df_prcp.index[window_size - 1:20]):
    print(date.date(), (date - df_prcp.index[n -1]).days)

In [None]:
plt.plot(df_prcp['Accumulated'][(mask) & (df_prcp['Accumulated'] > 25)].index, 
         df_prcp['Accumulated'][(mask) & (df_prcp['Accumulated'] > 25)], 'bd', alpha=.5, ms=2.)

In [None]:
#whos

In [None]:
#corr_matrix = df_prcp
#corr_matrix.corr(method='pearson', min_periods=1)