# Global Model Testing (v1.0)

Casey A Graff

November 3rd, 2017

Now using updated $X_{cluster}$ format.

In [None]:
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
SRC_DIR = REP_DIR + 'src/'
DATA_DIR = REP_DIR + 'data/'

# Load system-wide packages
import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import datetime as dt
import bisect
plt.rcParams['figure.figsize'] = [15,7]
%matplotlib inline

# Load project packages
os.chdir(SRC_DIR)
from features.loaders import load_integrated_df
from features.loaders import load_fire_cube
import models.poisson_regression as pr
import models.linear_regression as lr
import models.regional_summation_regression as rsr
import models.evaluation as ev
from models import metrics
from helper import date_util as du
from helper import df_util as dfu
from helper import preprocessing as pp
from features import generate_grid as gg

In [None]:
sys.path.append(SRC_DIR+'features')
int_5km_10days_14_1k_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_4_modis_5km_10days_1400_1k_alaska_2007-2016.pkl'))
int_5km_10days_14_2k_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_4_modis_5km_10days_1400_2k_alaska_2007-2016.pkl'))
int_5km_10days_14_3k_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_4_modis_5km_10days_1400_3k_alaska_2007-2016.pkl'))
#int_5km_10days_14_4k_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_4_modis_5km_10days_1400_4k_alaska_2007-2016.pkl'))
#int_5km_10days_14_5k_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_4_modis_5km_10days_1400_5k_alaska_2007-2016.pkl'))

fire_cube, fire_cube_dates = load_fire_cube(os.path.join(DATA_DIR, 'interim/modis/fire_cube/fire_cube_modis_alaska_2007_2016.pkl'))


In [None]:
int_5km_10days_14_1k_df[:5]

In [None]:
reload(ev)
reload(gg)
reload(pp)
def prep(X, t_k, years=None):
    X = pp.standardize_covariates(X, ['temperature', 'humidity', 'wind', 'rain'])   
    X = X.assign(year=map(lambda x: x.year, X.date_local))
    if years: X = X[X.year.isin(years)]
    X_t = pp.add_autoregressive_col(X, t_k)
    
    return X_t

def train_poisson(X_t, t_k, covariates):  
    prm = pr.PoissonRegressionModel(t_k=t_k, covariates=covariates)
    results, years = ev.cross_validation_years(prm, X_t)
    
    preds = [x[0] for x in results]
    info  = [x[1] for x in results]
    
    return preds, info, years

def cluster_targets(res_info, X):
    targets = []
    for y_info in res_info:
        y_test = np.zeros(len(y_info))
        for i,inf in enumerate(y_info):
            y_test[i] = X[(X.date_local==inf[0]) & (X.cluster_id==inf[3])].iloc[0].num_det_target
        targets.append(y_test)
        
    return targets

def pred_to_grid(preds, info):
    grids = []
    dates_all = []
    for pred,inf in zip(preds,info):
        grid,dates = gg.gen_grid_predictions(pred, inf)
        grids.append(grid)
        dates_all.append(dates)
        
    return grids, dates_all

def grid_targets(dates_all, fire_cube, fire_cube_dates, t_k):
    # TODO: Only need the dates from predictions
    targets = []
    for dates in dates_all:
        start_ind, end_ind = bisect.bisect_left(fire_cube_dates, dates[0]), bisect.bisect_left(fire_cube_dates, dates[-1])
        target_grid = fire_cube[:,:,start_ind:end_ind+1]
        # Append t_k days of all zeros, then "shift" values forward by t_k days (dropping first t_k days)
        shape = np.shape(target_grid)[:2]+(t_k,)
        target_grid = np.concatenate((target_grid, np.zeros(shape)), axis=2)
        target_grid = target_grid[:,:,t_k:]
        
        targets.append(target_grid)
        
    return targets

In [None]:
# TODO: Move this to fire_weather_integration
X_pp = []
X_pp.append(prep(int_5km_10days_14_1k_df.copy(), 1))
X_pp.append(prep(int_5km_10days_14_2k_df.copy(), 2))
X_pp.append(prep(int_5km_10days_14_3k_df.copy(), 3))

In [None]:
def plot(results, t_k_arr, metric):
    #plt.plot(t_k_arr, map(lambda x: metric(*x), results['baseline_mean']), "yv--", label="Baseline (Mean)", linewidth=2)
    #plt.plot(t_k_arr, map(lambda x: metric(*x), results['baseline_median']), "cv--", label="Baseline (Median)", linewidth=2)
    #plt.plot(t_k_arr, map(lambda x: metric(*x), results['baseline_prev']), "kv--", label="Baseline (Previous)", linewidth=2)
    plt.plot(t_k_arr, map(lambda x: metric(*x), results['auto']), "gs--", label="Autoregression", linewidth=2)
    plt.plot(t_k_arr, map(lambda x: metric(*x), results['temp_humid']), "r^--", label="Temp/hum", linewidth=2)
    plt.plot(t_k_arr, map(lambda x: metric(*x), results['all']), "bo--", label="All weather", linewidth=2)
    
    plt.rcParams.update({'font.size': 14})
    lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xlabel("Day of forecast (k)")
    plt.xticks(t_k_arr+1)
    plt.ylabel(metric.__name__)
    

In [None]:
t_k_arr = np.arange(1, 4)

results = defaultdict(list)

reload(ev)
reload(rsr)

for t_k in t_k_arr:
    print 'Starting %d' % t_k    
    results['auto'].append(train_poisson(X_pp[t_k-1], t_k, []))
    
    results['temp_humid'].append(train_poisson(X_pp[t_k-1], t_k, ['temperature', 'humidity']))

    results['all'].append(train_poisson(X_pp[t_k-1], t_k, ['temperature', 'humidity', 'wind', 'rain']))

In [None]:
def evaluate_cluster(results, X_pp, t_k_arr):
    results_new = defaultdict(list)
    for t_k in t_k_arr:
        info = results['auto'][t_k-1][1]
        targets_c = cluster_targets(info, X_pp[t_k-1])
    
        for k in results:            
            results_new[k].append(np.concatenate(zip(targets_c, results[k][t_k-1][0]), axis=1))
    
    return results_new

def evaluate_grid(results, fire_cube, fire_cube_dates, t_k_arr):
    results_new = defaultdict(list)
    _, dates_all = pred_to_grid(results['auto'][0][0], results['auto'][0][1])
    for t_k in t_k_arr:
        targets_g = grid_targets(dates_all, fire_cube, fire_cube_dates, t_k)
    
        for k in results:
            predict_g, dates_all = pred_to_grid(results[k][t_k-1][0], results[k][t_k-1][1])
            results_new[k].append(map(lambda x: x.flatten(), np.concatenate(zip(targets_g, predict_g), axis=3)))
    
    return results_new

In [None]:
print np.shape(results['auto'][t_k-1][1][0])

In [None]:
results_new = evaluate_cluster(results, X_pp, t_k_arr)
plot(results_new, t_k_arr, metrics.mean_absolute_error)
plt.show()
plot(results_new, t_k_arr, metrics.root_mean_squared_error)

In [None]:
results_new = evaluate_grid(results, fire_cube, fire_cube_dates, t_k_arr)
plot(results_new, t_k_arr, metrics.mean_absolute_error)
plt.show()
plot(results_new, t_k_arr, metrics.root_mean_squared_error)

In [None]:
t_k_arr = np.arange(0, 3)

results = defaultdict(list)

reload(ev)
reload(rsr)

for t_k in t_k_arr:
    print 'Starting %d' % t_k
    X_ = pp2(int_5km_10days_14_1k_df, t_k)
    X_region = rsr.RegionalSummationModel(t_k+1, None, None, None).build_regional_data(X_)
    print 'Mean Daily Det %f' % np.mean(X_region.num_det)
    
    results['baseline_mean'].append((X_region.num_det_target, np.mean(X_region.num_det)))
    results['baseline_median'].append((X_region.num_det_target, np.median(X_region.num_det)))
    results['baseline_prev'].append((X_region.num_det_target, X_region.num_det))

    results['auto'].append(train_regional(X_p[t_k], t_k, [], X_))
    
    results['temp_humid'].append(train_regional(X_p[t_k], t_k, ['temperature', 'humidity'], X_))

    results['all'].append(train_regional(X_p[t_k], t_k, ['temperature', 'humidity', 'wind', 'rain'], X_))
    