In [3]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta
from scipy.special import expit
from scipy.optimize import fmin, minimize
import glob as glob
import matplotlib
matplotlib.style.use('ggplot')

# .py
%load_ext autoreload
%autoreload 2
import weather_all_alphas, weather_all_costs, prediction_humidity, model_02, validate, dengue_weather

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
def seperate_train_test(all_data,train_data):
    plt.axvspan(0, len(train_data), color='blue', alpha=0.1)
    plt.text((len(train_data)/2)-25,700,'Train Period',size=16)
    plt.axvline(len(train_data)+1,linewidth=0.5, color='green')
    plt.text(len(train_data)+20,700,'Test Period',size=16)
    plt.axvspan(len(train_data)+1, len(all_data), color='green', alpha=0.1)

def plot_all_predictions(LAG, real, train, province, nweeks_to_predict, all_predictions):
    
    plt.figure(figsize=(13,6))
    plt.plot(range(len(real)),real.cases,label='real')
    graph_title = "%s's Dengue Prediction with Weather Data"%province
    plt.title(graph_title)
    for i in range(len(all_predictions)):
        cur_week_guess = nweeks_to_predict[i]
        start_predict_week = LAG+(cur_week_guess-1)
        predicted_cases = all_predictions[i]
        x_weeks = range(start_predict_week,len(predicted_cases)+start_predict_week)
        line_label = "%d week guess"%cur_week_guess
        plt.plot(x_weeks,predicted_cases,label=line_label,alpha=0.8)
    
    seperate_train_test(real,train)
    plt.axvline(LAG)
    plt.text((LAG*2)+10,300,'Prediction Start',horizontalalignment='center',verticalalignment='center')
    plt.xlabel("# week starting 2003")
    plt.ylabel("Dengue cases")
    plt.legend()
#     plt.savefig("16-lags-16-weeks-9-temp-wks-chiangmai-00.png")
    
def show_predictions(params, real, train, province, ws):
    LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, AVGRH_WEEKS = params[0], params[1], params[2], params[3]
    
    predictions_to_plot = prediction_humidity.get_predictions(
        LAG, 
        TEMPERATURE_WEEKS,
        RAIN_WEEKS,
        AVGRH_WEEKS,
        real,
        ws,
        province)
    
    plot_all_predictions(LAG, 
                         real,
                         train,
                         province,
                         [1,2,4,8,16],
                         predictions_to_plot)


In [169]:
all_weather = pd.read_csv("weather01-16.csv")
all_weather.stn_name = all_weather.stn_name.apply(dengue_weather.remove_space)
all_dengue = pd.read_csv("all-dengues.csv")
all_dengue = all_dengue.rename(columns = {'จังหวัด' : 'province'})

In [173]:
                       # province_dengue: province_weather
provinces_to_predict = {"Bangkok":"BangkokMetropolis",
                        "PrachuapKhiriKhan":"PrachuapKhiriKhan",
                        "PhraNakhonSiAyutthaya":"AyuttayaAgromet", 
                        "Songkhla":"PhatthalungAgromet", 
                        "Ratchaburi":"Ratchaburi", 
                        "ChonBuri":"ChonBuri", 
                        "NongKhai":"NongKhai",
                        "SakonNakhon":"SakonNakhonAgromet",
                        "ChiangMai":"ChiangMai",
                        "ChiangRai":"ChiangRai"}

province_data = {}
for province_for_dengue in provinces_to_predict:
    province_for_weather = provinces_to_predict.get(province_for_dengue)
    print province_for_dengue, province_for_weather
    
    province_data_to_predict = dengue_weather.get_dengue_weather_by_province(
                                                                    province_for_dengue, 
                                                                    province_for_weather, 
                                                                    all_dengue, 
                                                                    all_weather)
    province_data[province_for_dengue] = province_data_to_predict
    
# province data is dictionary of province to its train and real data, idx 0 and 1
# ex --> province_data.get('Bangkok')[0]

Bangkok BangkokMetropolis
Ratchaburi Ratchaburi
PhraNakhonSiAyutthaya AyuttayaAgromet
NongKhai NongKhai
SakonNakhon SakonNakhonAgromet
PrachuapKhiriKhan PrachuapKhiriKhan
Songkhla PhatthalungAgromet
ChiangMai ChiangMai
ChonBuri ChonBuri
ChiangRai ChiangRai


In [172]:
province_data

{'Bangkok': (                avgrh       dday   meantemp   rain  cases
  date                                                     
  2003-01-05  64.000000   3.500000  28.750000    0.0    224
  2003-01-12  64.714286   9.000000  27.228571    0.0    274
  2003-01-19  70.285714  16.000000  26.457143    0.0    183
  2003-01-26  70.285714  23.000000  28.100000    0.0    194
  2003-02-02  65.000000  21.142857  28.942857    0.0    181
  2003-02-09  60.000000   6.000000  27.742857    0.0    162
  2003-02-16  75.571429  13.000000  29.914286    0.4    147
  2003-02-23  74.857143  20.000000  30.214286    0.0    161
  2003-03-02  74.428571  19.000000  30.585714    0.3    178
  2003-03-09  73.428571   6.000000  30.757143   17.2    149
  2003-03-16  72.142857  13.000000  28.900000   28.4    136
  2003-03-23  75.285714  20.000000  30.271429   50.4    102
  2003-03-30  77.571429  27.000000  29.100000   38.6     95
  2003-04-06  69.857143   7.428571  31.100000    0.0    120
  2003-04-13  72.571429  10.0

In [151]:
province_data_for_prediction = []
for province in province_data:
    province_train = province_data.get(province)[0]
    province_test = province_data.get(province)[1]
    province_data_for_prediction.append((province,province_train,province_test))
    

In [152]:
print province_data_for_prediction[0][1].head()
print province_data_for_prediction[1][1].head()

                avgrh       dday   meantemp  rain  cases
date                                                    
2003-01-05  64.000000   3.500000  28.750000   0.0    224
2003-01-12  64.714286   9.000000  27.228571   0.0    274
2003-01-19  70.285714  16.000000  26.457143   0.0    183
2003-01-26  70.285714  23.000000  28.100000   0.0    194
2003-02-02  65.000000  21.142857  28.942857   0.0    181
                avgrh       dday   meantemp  rain  cases
date                                                    
2003-01-05  64.000000   3.500000  28.750000   0.0     24
2003-01-12  64.714286   9.000000  27.228571   0.0     35
2003-01-19  70.285714  16.000000  26.457143   0.0     27
2003-01-26  70.285714  23.000000  28.100000   0.0     18
2003-02-02  65.000000  21.142857  28.942857   0.0     11


In [157]:
def province_prediction(data_for_prediction):
    
    province = data_for_prediction[0]
    """change here to full data"""
    train = data_for_prediction[1][:52]
    real = data_for_prediction[2][:104]
    
    print province
    print 
    print train.head()
    print 
    print real.head()
    
    LAG, TEMPERATURE_WEEKS, RAIN_WEEKS, AVGRH_WEEKS = 19, 9, 15, 1
                                    # get_alphas takes 4 args, if theres no input csv
        
    alphas = weather_all_alphas.get_alphas(
        LAG, 
        TEMPERATURE_WEEKS, 
        RAIN_WEEKS, 
        AVGRH_WEEKS,
        train, 
        ws_csv)
    
    print "done get_alphas"
                                    # get_predictions takes 6 args
    predictions_to_validate = prediction_humidity.get_predictions(
        LAG, 
        TEMPERATURE_WEEKS, 
        RAIN_WEEKS, 
        AVGRH_WEEKS, 
        real, 
        alphas.x, 
        province)
    
    print "done get_predictions"
                                    # get_validations takes 3 argsg
    validated_result = validate.get_validations(
        LAG, 
        predictions_to_validate, 
        real)
    
    print "done get_validations"
    
    return province, predictions_to_validate, validated_result, alphas.x


In [158]:
%%time
from multiprocessing import Pool

result_list = []
def log_result(result):
    # This is called whenever foo_pool(i) returns a result.
    # result_list is modified only by the main process, not the pool workers.
    result_list.append(result)

def apply_async_with_callback():
    pool = Pool(processes=3)
    for data_for_prediction in province_data_for_prediction:
        pool.apply_async(province_prediction, args = (data_for_prediction, ), callback = log_result)
    pool.close()
    pool.join()
    print(result_list)

if __name__ == '__main__':
    apply_async_with_callback()

Bangkok

Nonthaburi

                avgrh       dday   meantemp  rain  cases
date                                                    
2003-01-05  64.000000   3.500000  28.750000   0.0    224
2003-01-12  64.714286   9.000000  27.228571   0.0    274
2003-01-19  70.285714  16.000000  26.457143   0.0    183
2003-01-26  70.285714  23.000000  28.100000   0.0    194
2003-02-02  65.000000  21.142857  28.942857   0.0    181                avgrh       dday   meantemp  rain  cases
date                                                    
2003-01-05  64.000000   3.500000  28.750000   0.0     24
2003-01-12  64.714286   9.000000  27.228571   0.0     35
2003-01-19  70.285714  16.000000  26.457143   0.0     27
2003-01-26  70.285714  23.000000  28.100000   0.0     18
2003-02-02  65.000000  21.142857  28.942857   0.0     11



                avgrh       dday   meantemp  rain  cases
date                                                    
2003-01-05  64.000000   3.500000  28.750000   0.0     24
2003-01-

In [168]:
result_list[1][2]

[261.87315037892012,
 395.12464940558061,
 776.8943285664194,
 1566.5795581692028,
 3077.6258498651578]