In [2]:
%matplotlib inline

import pandas as pd
import numpy as np
import datetime
import seaborn as sns
from matplotlib import pyplot as plt

# Run all nodes interactively
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [3]:
weather_data = pd.DataFrame()
date = datetime.date(year = 2012, month = 1, day = 1)
while date < datetime.date(year = 2013, month = 1, day = 1):
    nextday = pd.Series()
    nextday.name = date
    nextday_df = pd.read_json('data/weather/Theewaterskloof_{0}.json'.format(str(date.strftime('%Y%m%d'))))
    for key, value in nextday_df['history']['dailysummary'][0].items():
        if not isinstance(value, dict):
            nextday[key] = value
    nextday = nextday.to_frame().transpose()
    weather_data = pd.concat([weather_data, nextday], axis = 0)
    date = date + datetime.timedelta(days = 1)

In [7]:
weather_data.index = pd.to_datetime(weather_data.index)
weather_data = weather_data.apply(pd.to_numeric, errors = 'ignore')
num_var = weather_data.select_dtypes(include=[np.number]).columns
is_all_null = weather_data.apply(lambda x: x.isnull().sum() == len(x))
not_all_null = is_all_null[is_all_null == False]
weather_data = weather_data[not_all_null.index]

In [37]:
dam_levels = pd.read_csv('data/Dam-levels-clean-20120101-20171206.csv')
tw_dam_levels = dam_levels.loc[dam_levels['dam_name'] == 'Theewaterskloof']
tw_dam_levels.index = tw_dam_levels['date']
tw_dam_levels.index = pd.to_datetime(tw_dam_levels.index)
tw_dam_levels = tw_dam_levels.drop('date', axis = 1)

In [40]:
weather_data.head()
tw_dam_levels.head()
data = pd.merge(tw_dam_levels, weather_data, left_index = True, right_index = True)
data.head()

Unnamed: 0,fog,rain,snow,hail,thunder,tornado,meantempm,meantempi,meandewptm,meandewpti,...,maxvism,maxvisi,minvism,minvisi,gdegreedays,heatingdegreedays,coolingdegreedays,precipm,precipi,precipsource
2012-01-01,0,0,0,0,0,0,18,65,14,56,...,25.0,16.0,10.0,6.2,15,0,0,0.0,0.0,3Or6HourObs
2012-01-02,0,1,0,0,0,0,20,68,17,63,...,20.0,12.0,2.0,1.2,18,0,4,0.2,0.01,Precip6GroupSynop
2012-01-03,0,1,0,0,0,0,20,70,14,57,...,25.0,16.0,10.0,6.0,20,0,4,0.0,0.0,Precip6GroupSynop
2012-01-04,0,0,0,0,0,0,18,65,12,54,...,25.0,16.0,10.0,6.2,15,0,0,0.0,0.0,3Or6HourObs
2012-01-05,0,0,0,0,0,0,22,71,14,58,...,25.0,16.0,10.0,6.2,21,0,6,0.0,0.0,3Or6HourObs


Unnamed: 0_level_0,dam_name,height_m,storage_ml,current_%,last year_%
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-01,Theewaterskloof,24.83,357963.0,74.5,
2012-01-02,Theewaterskloof,24.8,356677.0,74.3,
2012-01-03,Theewaterskloof,24.77,355394.0,74.0,
2012-01-04,Theewaterskloof,24.73,353687.0,73.7,
2012-01-05,Theewaterskloof,24.67,351135.0,73.1,


Unnamed: 0,dam_name,height_m,storage_ml,current_%,last year_%,fog,rain,snow,hail,thunder,...,maxvism,maxvisi,minvism,minvisi,gdegreedays,heatingdegreedays,coolingdegreedays,precipm,precipi,precipsource
2012-01-01,Theewaterskloof,24.83,357963.0,74.5,,0,0,0,0,0,...,25.0,16.0,10.0,6.2,15,0,0,0.0,0.0,3Or6HourObs
2012-01-02,Theewaterskloof,24.8,356677.0,74.3,,0,1,0,0,0,...,20.0,12.0,2.0,1.2,18,0,4,0.2,0.01,Precip6GroupSynop
2012-01-03,Theewaterskloof,24.77,355394.0,74.0,,0,1,0,0,0,...,25.0,16.0,10.0,6.0,20,0,4,0.0,0.0,Precip6GroupSynop
2012-01-04,Theewaterskloof,24.73,353687.0,73.7,,0,0,0,0,0,...,25.0,16.0,10.0,6.2,15,0,0,0.0,0.0,3Or6HourObs
2012-01-05,Theewaterskloof,24.67,351135.0,73.1,,0,0,0,0,0,...,25.0,16.0,10.0,6.2,21,0,6,0.0,0.0,3Or6HourObs


In [None]:
def multi_plots(df):
    ncols = len(df.columns)
    fig = plt.figure(figsize = (10, ncols * 5))

    for idx, col in enumerate(df.columns):
        ax = fig.add_subplot(ncols, 1, idx + 1)
        ax.set_title(col)
        ax.plot(df[col])
    plt.show();

In [None]:
fig = plt.figure(figsize = (12,12))
def filtered_heatmap(data, y, k = 10):
    k = 10  # Number of variables for heatmap
    corrmat = data.corr()
    cols = corrmat.nlargest(k, y)[y].index
    cm = np.corrcoef(data[cols].values.T)
    sns.set(font_scale = 1.25)
    hm = sns.heatmap(cm, cbar = True, annot = True, square = True, fmt = '.2f',
                     annot_kws = {'size': 10}, yticklabels = cols.values, xticklabels = cols.values)
    plt.show()
    
filtered_heatmap(num_weather_data, 'precipm', 10)

In [None]:
weather_long = pd.melt(num_weather_data, id_vars = 'date')
weather_long.head()

In [None]:
grid = sns.FacetGrid(weather_long, col = "variable", col_wrap = 3, sharex = False, sharey = False)
grid = grid.map(sns.distplot, "value")

In [None]:
grid = sns.FacetGrid(weather_long, col = "variable", col_wrap = 3, sharex = False, sharey = False)
grid = grid.map(plt.plot, "value")

In [None]:
fig = plt.figure(figsize = (15,15) )
ax1 = fig.add_subplot(3, 1, 1)
ax2 = fig.add_subplot(3, 1, 2)
ax3 = fig.add_subplot(3, 1, 3)

ax1.plot(theewaterskloof['diff_storage_ml'])
ax2.plot(theewaterskloof['precipm'])
ax3.plot(theewaterskloof['rolling3_lead1_precipm'])
#ax1.plot(theewaterskloof['precipm'])

In [None]:
fig, ax = plt.subplots()
ax1, ax2 = two_scales(ax, theewaterskloof.index, theewaterskloof['storage_ml'], theewaterskloof['rolling3_lead1_precipm'], 'r', 'b')


In [None]:
theewaterskloof['storage_ml'].corr(theewaterskloof['precipm'])
plt.scatter(theewaterskloof['storage_ml'],theewaterskloof['precipm'])
plt.ylim([0,20])

In [None]:
theewaterskloof['diff_storage_ml'].corr(theewaterskloof['precipm'])
plt.figure(figsize = (10, 7))
plt.scatter(theewaterskloof['diff_storage_ml'],theewaterskloof['precipm'])

In [None]:
theewaterskloof['diff_storage_ml'] = theewaterskloof['storage_ml'].diff(periods = 1)
theewaterskloof['lead1_precipm'] = theewaterskloof['precipm'].shift(periods = 1)
theewaterskloof['lead2_precipm'] = theewaterskloof['precipm'].shift(periods = 2)
theewaterskloof['lead3_precipm'] = theewaterskloof['precipm'].shift(periods = 3)
theewaterskloof['rolling2_lead1_precipm'] = theewaterskloof['lead1_precipm'].rolling(window = 2).sum()
theewaterskloof['rolling2_precipm'] = theewaterskloof['precipm'].rolling(window = 2).sum()


In [None]:
theewaterskloof['diff_storage_ml'].corr(theewaterskloof['rolling2_lead1_precipm'])
plt.figure(figsize = (10, 7))
plt.scatter(theewaterskloof['diff_storage_ml'],theewaterskloof['rolling2_lead1_precipm'])

In [None]:
theewaterskloof['month'] = theewaterskloof['date'].dt.month
theewaterskloof_winter = theewaterskloof.loc[(theewaterskloof['month'] >= 6) &
                                            (theewaterskloof['month'] < 10) &
                                            (theewaterskloof['diff_storage_ml'] > 0)]
theewaterskloof_winter['diff_storage_ml'].corr(theewaterskloof_winter['rolling2_lead1_precipm'])
plt.figure(figsize = (10, 7))
plt.scatter(theewaterskloof_winter['diff_storage_ml'],theewaterskloof_winter['rolling2_lead1_precipm'])

In [None]:
theewaterskloof_winter['precipm'].hist(bins = 30)