# Global Model Testing (v1.0)

Casey A Graff

October 27th, 2017

Developing and initial exploration of results from global model.

In [None]:
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
SRC_DIR = REP_DIR + 'src/'
DATA_DIR = REP_DIR + 'data/'

# Load system-wide packages
import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from collections import defaultdict
import datetime as dt
plt.rcParams['figure.figsize'] = [15,7]
%matplotlib inline

# Load project packages
os.chdir(SRC_DIR)
from features.loaders import load_integrated_df
import models.poisson_regression as pr
import models.linear_regression as lr
import models.regional_summation_regression as rsr
import models.evaluation as ev
from models import metrics
from helper import date_util as du
from helper import df_util as dfu

In [None]:
int_5km_10days_14_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_modis_5km_10days_1400_alaska_2007-2016.pkl'))

In [None]:
int_5km_10days_14_df[:5]

In [None]:
def pp(X, t_k, years=None):
    # Standardize weather
    for cov in ['temperature', 'humidity', 'wind', 'rain']:
        X[cov] = (X[cov] - np.mean(X[cov])) / np.var(X[cov])

    X = X.assign(year=map(lambda x: x.year, X.date_local))

    # Filter years
    if years:
        X = X[X.year.isin(years)]
    
    results_dict = defaultdict(list)

    # Filter out predicting before fire started
    legit_series = pd.Series(index=X.index)
    for clust in X.cluster_id.unique():
        clust_df = X[X.cluster_id==clust]
        legit_day = np.min(clust_df.date_local) + du.INC_ONE_DAY * (t_k+1)
        legit_series[clust_df[clust_df.date_local>=legit_day].index]=1        

    X_legit = X[legit_series==1]

    X_t = pr.PoissonRegressionModel(t_k, []).add_autoregressive_col(X_legit, t_k+1)
    
    return X_t

def pp2(X, t_k, years=None):
    # Standardize weather
    for cov in ['temperature', 'humidity', 'wind', 'rain']:
        X[cov] = (X[cov] - np.mean(X[cov])) / np.var(X[cov])

    X = X.assign(year=map(lambda x: x.year, X.date_local))

    # Filter years
    if years:
        X = X[X.year.isin(years)]
    
    results_dict = defaultdict(list)

    X_t = pr.PoissonRegressionModel(t_k, []).add_autoregressive_col(X, t_k+1)
    
    return X_t
        
def train_poisson(X_t, t_k):  
    prm = pr.PoissonRegressionModel(t_k=t_k, covariates=[])

    results, years = ev.cross_validation_years(prm, X_t)
    results = np.concatenate(results, axis=1)
    
    return [results]

def train_regional(X_t, t_k, covariates=[], y2=None):
    prm = pr.PoissonRegressionModel(t_k=t_k, covariates=covariates)
    rsm = rsr.RegionalSummationModel(t_k=t_k, covariates=covariates, cluster_model=prm, ignition_model=None)
    
    results, years = ev.cross_validation_years(rsm, X_t, y2)
    
    print rsm.ignition_bias
    
    results = np.concatenate(results, axis=1)
    
    return results

In [None]:
X_p = []
X_p.append(pp(int_5km_10days_14_df.copy(), 0))
X_p.append(pp(int_5km_10days_14_df.copy(), 1))
X_p.append(pp(int_5km_10days_14_df.copy(), 2))
X_p.append(pp(int_5km_10days_14_df.copy(), 3))
X_p.append(pp(int_5km_10days_14_df.copy(), 4))

In [None]:
t_k_arr = np.arange(0, 4)

results = defaultdict(list)

reload(ev)
reload(rsr)

for t_k in t_k_arr:
    print 'Starting %d' % t_k
    X_ = pp2(int_5km_10days_14_df, t_k)
    X_daily = rsr.RegionalSummationModel(None, None, None, None).build_regional_data(X_)
    print 'Mean Daily Det %f' % np.mean(X_daily.num_det)
    
    results['baseline'].append((X_daily.num_det, X_daily.num_det_prev))

    results['auto'].append(train_regional(X_p[t_k], t_k, [], X_))
    
    results['temp_humid'].append(train_regional(X_p[t_k], t_k, ['temperature', 'humidity'], X_))

    results['all'].append(train_regional(X_p[t_k], t_k, ['temperature', 'humidity', 'wind', 'rain'], X_))
    
def plot(results, t_k_arr, metric):
    plt.plot(t_k_arr+1, map(lambda x: metric(*x), results['baseline']), "kv--", label="Baseline", linewidth=2)
    plt.plot(t_k_arr+1, map(lambda x: metric(*x), results['auto']), "gs--", label="Autoregression", linewidth=2)
    plt.plot(t_k_arr+1, map(lambda x: metric(*x), results['temp_humid']), "r^--", label="Temp/hum", linewidth=2)
    plt.plot(t_k_arr+1, map(lambda x: metric(*x), results['all']), "bo--", label="All weather", linewidth=2)
    
    plt.rcParams.update({'font.size': 14})
    lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xlabel("Day of forecast (k)")
    plt.xticks(t_k_arr+1)
    plt.ylabel(metric.__name__)
    
plot(results, t_k_arr, metrics.mean_absolute_error)