#### This python notebook looks at the relationship between stages of the Monsoon (burst) and Extreme Events

In [18]:
import pandas as pd
import numpy as np
import xarray as xr
from netCDF4 import Dataset as nc, num2date, MFDataset as mc
import os
from matplotlib import pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
import seaborn as sns
import cufflinks as cf
cf.set_config_file(offline=False, world_readable=True, theme='ggplot')
sns.set_style("whitegrid")
from DataFrame import *
from datetime import datetime

Define the path's where data is storde

In [2]:
largeScaleFile = os.path.join(os.environ['HOME'], 'Data', 'Extremes', 'CPOL', 'CPOL-large-scale_forcing.pkl')
largeScaleNc = os.path.join(os.environ['HOME'], 'Data', 'Extremes', 'CPOL', 'CPOL-large-scale_forcing.nc')
CPOL = os.path.join(os.environ['HOME'], 'Data', 'Extremes', 'CPOL', 'CPOL_area_avg_precip.pkl')
BurstF = os.path.join(os.environ['HOME'], 'Data', 'Extremes', 'CPOL', 'CPOL_burst.pkl')

In [3]:
cpolFrame = pd.read_pickle(largeScaleFile)
burst10m = pd.read_pickle(BurstF)
cpol10m = pd.read_pickle(CPOL)['all'] / 6.
cpol1h = cpol10m.groupby(pd.TimeGrouper('1h')).sum()
cpol6h = cpol1h.groupby(pd.TimeGrouper('6h')).mean()
burst6h = burst10m.groupby(pd.TimeGrouper('6h')).mean()

In [4]:
idx = burst6h.burst[burst6h.burst == 2].index

In [5]:
event = pd.Series(np.zeros(len(cpolFrame)),index = cpolFrame.index)
event.loc[cpolFrame['CPOL_rain'] > cpolFrame['CPOL_rain'].quantile(90/100.)] = 1.
extreme = cpol6h.quantile(90/100.)
#event.loc[data['CPOL_rain'] > data['CPOL_rain'].quantile(95/100.)] = 2.
cpolFrame['Event'] = event

In [6]:
priorBurst = cpol6h[idx].dropna()
burstPeriod = burst6h.dropna().index
normalExtremes = cpol6h[burstPeriod].loc[cpol6h[burstPeriod]>= extreme]
burstExtremes = priorBurst.loc[priorBurst >= extreme]

In [9]:
len(burstExtremes)/len(normalExtremes) * 100.

12.844036697247708

In [7]:
len(priorBurst)/float(len(burstPeriod)) * 100.

8.851674641148326

In [87]:
dataDir = os.path.join(os.getenv('HOME'),'Data','Darwin','netcdf')
files = []
for d in cpol10m.index.date:
    if d.month >= 6:
        D = '%04i%04i' %(d.year,d.year+1)
    else:
        D = '%04i%04i' %(d.year-1,d.year)
    files.append(os.path.join(dataDir,D,d.strftime('CPOL_RADAR_ESTIMATED_RAIN_RATE_%Y%m%d_level2.nc')))
os.system('cdo mergetime %s %s' %(' '.join(files), os.path.join(dataDir,'CPOL-rain.nc')))
    

32512

In [12]:
dates = normalExtremes.index.date

In [14]:
len(dates)

109

In [16]:
dataDir = os.path.join(os.getenv('HOME'),'Data','Darwin','netcdf')
files = []
for d in dates:
    if d.month >= 6:
        D = '%04i%04i' %(d.year,d.year+1)
    else:
        D = '%04i%04i' %(d.year-1,d.year)
    files.append(os.path.join(dataDir,D,d.strftime('CPOL_RADAR_ESTIMATED_RAIN_RATE_%Y%m%d_level2.nc')))

In [25]:
os.system('cdo mergetime %s %s' %(' '.join(files), os.path.join(dataDir,'ExtremesP90.rain')))

0

In [58]:
f = nc(os.path.join(dataDir,'ExtremesP90.rain'))
rr = np.ma.masked_invalid(f.variables['radar_estimated_rain_rate'][:])

In [59]:
rr = rr/6.

In [60]:
lon, lat = f.variables['longitude'][0,:], f.variables['latitude'][:,0]
time = pd.DatetimeIndex(num2date(f.variables['time'][:],f.variables['time'].units))

In [61]:
RRdata = xr.DataArray(rr, coords={'time':time,'lon': lon,'lat':lat}, dims=('time','lat','lon'))

In [85]:
RR6H = RRdata.resample(time='3H').mean('time')

In [86]:
RR6H['time']

<xarray.DataArray 'time' (time: 34896)>
array(['1999-01-16T00:00:00.000000000', '1999-01-16T03:00:00.000000000',
       '1999-01-16T06:00:00.000000000', ..., '2010-12-25T15:00:00.000000000',
       '2010-12-25T18:00:00.000000000', '2010-12-25T21:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 1999-01-16 1999-01-16T03:00:00 ...