In [108]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta
from scipy.special import expit
from scipy.optimize import fmin, minimize
import glob as glob
import matplotlib
matplotlib.style.use('ggplot')

# .py
%load_ext autoreload
%autoreload 2
import weather_alphas, weather_costs, model, model_01, validate, prediction, temp, ws_csv

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [109]:
def split_data(data):
    train_mask = (data.index < "2011")
    train = data[train_mask]
    test_mask = (data.index > "2011")
    test = data[test_mask]
    return train, test

In [110]:
all_weather = pd.read_csv("weather01-16.csv")

In [111]:
cm_weather = all_weather[all_weather.stn_name == "Chiang Mai"]
cm_weather.index = pd.DatetimeIndex(cm_weather.date)
mask = (cm_weather.index > "2003") & (cm_weather.index < "2016")
cm_weather = cm_weather[mask]

provinces_df = pd.read_csv("all-dengues.csv")
provinces_df.index = pd.DatetimeIndex(provinces_df.date)
provinces_df = provinces_df.drop(['date','date.1'],axis=1)
cm_dengues = provinces_df[provinces_df['จังหวัด'] == 'ChiangMai'].resample('W').size()
cm_dengues_df = pd.DataFrame(cm_dengues,columns=['cases'])

In [112]:
cm_avg_weather = cm_weather[['avgrh','dday','meantemp']].resample('W').mean()
cm_avg_weather['rain'] = cm_weather[['rain']].resample('W').sum() # cumulative rainfall
cm_dengues_weather = pd.concat([cm_avg_weather,cm_dengues_df[:-52]],axis=1)

cm_dengues_weather_split = split_data(cm_dengues_weather)
cm_dengues_train, cm_dengues_test = cm_dengues_weather_split[0], cm_dengues_weather_split[1]

In [113]:
def make_line(start_week,real,predictions,title,x_axis,y_axis,
              real_legend='Real',predict_legend='Prediction'):
    plt.figure(figsize=(12,6))
    plt.plot(range(len(real)),real,'-r',label=real_legend)
    plt.plot(range(start_week,len(predictions)+start_week),predictions,'-k',label=predict_legend,alpha=0.4)
    plt.axvline(start_week)
    plt.text((start_week*2)+10,300,'Prediction Start',horizontalalignment='center',verticalalignment='center')
    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    plt.legend()
    plt.title(title)
    
def plot_all_predictions(LAG, real, province, nweeks_to_predict, all_predictions):
    
    plt.figure(figsize=(13,6))
    plt.plot(range(len(real)),real,label='real')
    graph_title = "%s's Dengue Prediction with Mean Temperature"%province
    plt.title(graph_title)
    for i in range(len(all_predictions)):
        cur_week_guess = nweeks_to_predict[i]
        start_predict_week = LAG+(cur_week_guess-1)
        predicted_cases = all_predictions[i]
        x_weeks = range(start_predict_week,len(predicted_cases)+start_predict_week)
        line_label = "%d week guess"%cur_week_guess
        plt.plot(x_weeks,predicted_cases,label=line_label,alpha=0.8)
    
    seperate_train_test(cm_dengues_df.cases,cm_dengues_train)
    plt.axvline(LAG)
    plt.text((LAG*2)+10,300,'Prediction Start',horizontalalignment='center',verticalalignment='center')
    plt.xlabel("# week starting 2003")
    plt.ylabel("Dengue cases")
    plt.legend()
    plt.savefig("16-lags-16-weeks-9-temp-wks-chiangmai-00.png")
    
def seperate_train_test(all_data,train_data):
    plt.axvspan(0, len(train_data), color='blue', alpha=0.1)
    plt.text((len(train_data)/2)-25,700,'Train Period',size=16)
    plt.axvline(len(train_data)+1,linewidth=0.5, color='green')
    plt.text(len(train_data)+20,700,'Test Period',size=16)
    plt.axvspan(len(train_data)+1, len(all_data), color='green', alpha=0.1)

In [118]:
import multiprocessing as mp
from multiprocessing import Process, Queue

# Define an output queue
output = Queue()

# def validate

# def compute_ret(temp):
#     for i in range(int(1e5)): pass
#     return temp/2.


ws_csv = np.array(pd.read_csv("best-for-18-lags-ws.csv").T)[0][:18]
    
def validation(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, real, output):
                                    # get_alphas takes 4 args, if theres no input csv
    alphas = weather_alphas.get_alphas(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, ws_csv)
                                    # get_predictions takes 6 args
    predictions_to_validate = prediction.get_predictions(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, real, alphas.x, "CM")
                                    # get_validations takes 3 argsg
    validated_result = validate.get_validations(LAG, predictions_to_validate, real)
    output.put((LAG,validated_result,alphas.x)) # add in ws as well alphas.x


In [119]:
%%time
# Setup a list of processes that we want to run
train = cm_dengues_train[:52]
real = cm_dengues_weather
processes = [Process(target=validation, args=(weeks, 9, 10, train, real, output)) for weeks in range(15,25)]

# Run processes
for p in processes:
    p.start()

# Exit the completed processes
for p in processes:
    p.join()

# Get process results from the output queue
results = [output.get() for p in processes]

print(results)

39
39
ws_csv
40
40
[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]41
ws_csv

41


Process Process-39:


ws_csv


Traceback (most recent call last):


[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03][  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]



Process Process-40:
Process Process-41:


42


  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap


43
42


Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
Traceback (most recent call last):


44


    self.run()


43


  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap


ws_csv
ws_csv


    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()


45
[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03][  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]44


    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run


45
ws_csv
46


  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run





    self._target(*self._args, **self._kwargs)





  File "<ipython-input-118-b63f210ce297>", line 18, in validation


ws_csv


    self._target(*self._args, **self._kwargs)
  File "<ipython-input-118-b63f210ce297>", line 18, in validation
Process Process-42:
    alphas = weather_alphas.get_alphas(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, ws_csv)


ws_csv
47


Process Process-43:


46


Traceback (most recent call last):


[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]

Traceback (most recent call last):


47


  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap


[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]

    alphas = weather_alphas.get_alphas(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, ws_csv)
  File "<ipython-input-118-b63f210ce297>", line 18, in validation
  File "weather_alphas.py", line 73, in get_alphas
    alphas = weather_alphas.get_alphas(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, ws_csv)
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap


48


  File "weather_alphas.py", line 73, in get_alphas




[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]ws_csv


    prev_ws = ws_csv.ws_helper(LAG, ws_csv)
  File "weather_alphas.py", line 73, in get_alphas
Process Process-44:
AttributeError: 'numpy.ndarray' object has no attribute 'ws_helper'
    prev_ws = ws_csv.ws_helper(LAG, ws_csv)


48


    prev_ws = ws_csv.ws_helper(LAG, ws_csv)





AttributeError: 'numpy.ndarray' object has no attribute 'ws_helper'


[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]

Traceback (most recent call last):
Process Process-45:
Process Process-46:
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap





Traceback (most recent call last):
Traceback (most recent call last):
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
AttributeError: 'numpy.ndarray' object has no attribute 'ws_helper'
    self.run()
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run


ws_csv
[  2.40242300e+00   5.11488495e-03   3.47680068e-01   2.55411662e-03
   0.00000000e+00   2.36221708e-01   1.17393365e-01   1.75482967e-08
   4.16553910e-08   0.00000000e+00   0.00000000e+00   2.56447636e-12
   1.52404082e-06   1.91165572e-07   1.31389414e-07   8.13331251e-08
   3.92673460e-07   5.22904629e-03]


Process Process-48:
Traceback (most recent call last):
    self._target(*self._args, **self._kwargs)
Process Process-47:
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
Traceback (most recent call last):
  File "<ipython-input-118-b63f210ce297>", line 18, in validation
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    alphas = weather_alphas.get_alphas(LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, train, ws_csv)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-118-b63f210ce297>", line 18, in validation
  File "/usr/lib/pyth

KeyboardInterrupt: 

In [54]:
def get_best(validations):
    idx = None
    best_so_far = validations[0][1][-1]
    print best_so_far
    for i in range(len(validations)):
        cur_validation = validations[i][1]
#         print cur_validation
        if cur_validation[0] <= best_so_far:
            best_so_far = cur_validation[0]
            idx = i
    return validations[idx]
        
    
best_param = get_best(results)

4748.1532393


In [103]:
# pd.DataFrame(best_param[2]).to_csv("best-for-18-lags-ws.csv",index=False)

lg = [16,17,18,20,22]
a = np.array(pd.read_csv("best-for-18-lags-ws.csv").T)[0][:18]

# for i in lg:
#     if i == len(a):
#         arr = a
#         print len(arr)
#     elif i < len(a):
#         arr = a[:-(18-i)]
#         print len(arr)
#     else:
#         mean_val = 1/float(i)
#         arr = np.append(a,[mean_val]*(i-len(a)))
#         print arr
        
def ws_helper(LAG, ws):
    n_ws = len(ws)
    if LAG == n_ws:
        arr = ws
    elif LAG < n_ws:
        arr = ws[:-(n_ws - LAG)]
    else:
        tail = LAG - n_ws
        mean_val = 1/(float(LAG)*tail)
        arr = np.append(ws,[mean_val]*tail)
    return arr

ws_helper(20,a)

0.15833092825972878

In [69]:
%%time
[compute_ret(temp.get_alphas(i,9,10)) for i in range(16,26)]

NameError: name 'compute_ret' is not defined