# Fire and Weather Timing Open Question Exploration (v1.0)

Casey A Graff

August 11th, 2017

Exploring the relationships in time between fire detections and weather measurements. Also of interest is when to "cut-off" predictions for day T vs T+1.

In [None]:
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
SRC_DIR = REP_DIR + 'src/'
DATA_DIR = REP_DIR + 'data/'

# Load system-wide packages
import os
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

# Load project packages
os.chdir(SRC_DIR)
from features.loaders import load_cluster_df
plt.rcParams['figure.figsize'] = [15,7]

In [None]:
# Load data
cluster_df = load_cluster_df(os.path.join(DATA_DIR, 'archived/cluster/clust_feat_df_5.pkl'))
print list(cluster_df)

In [None]:
fire_season = (133,242)
season_days = np.arange(fire_season[0], fire_season[1]+1)

cluster_counts = []
for i in range(int(max(cluster_df.cluster))):
    sel_df = cluster_df[cluster_df.cluster==i]
    cluster_counts.append((i, np.sum(sel_df.n_det)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print len(cluster_counts), [c for c in cluster_counts if c[1]][:20]

In [None]:
def plot_cluster(cluster_id):
    sel_df = cluster_df[cluster_df.cluster==cluster_id]

    fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(12,10))
    plt.suptitle('Cluster #{}'.format(cluster_id))
    plt.tight_layout(pad=4)

    data_types = [('n_det', 'r+', 'Num Det'), ('rain', '', 'Rain'), ('rain_del_2', '', 'Rain2'), ('humidity', '', 'Humidity'),
                  ('wind', '', 'Wind'), ('temp', '', 'Temp (C)')]

    for i, (type_,form,title) in enumerate(data_types):
        axes[i].plot(sel_df.dayofyear, sel_df[type_], form)
        axes[i].set_title(title)
        
    

plot_cluster(254)
plot_cluster(244)
plot_cluster(984)


In [None]:
YEAR_SEL = 2008
fire_season = (133,242)

def mean_with_nans(arr):
    arr = [v for v in arr if not np.isnan(v)]
    if not arr: return np.nan 
    else: return np.mean(arr)

print 'Using Target Time %.2d00' % 8
sel_df = cluster_df[cluster_df.year==YEAR_SEL]
sel_df = sel_df[(sel_df.dayofyear >= fire_season[0]) & (sel_df.dayofyear <= fire_season[1])]

dates, num_det, rain, humidity, wind, temp = [], [], [], [], [], []
for date in range(fire_season[0], fire_season[1]+1):
    dates.append(date)
    day_df = sel_df[sel_df.dayofyear==date]
    num_det.append(np.sum(day_df.n_det))
    rain.append(mean_with_nans(day_df.rain))
    humidity.append(mean_with_nans(day_df.humidity))
    wind.append(mean_with_nans(day_df.wind))
    temp.append(mean_with_nans(day_df.temp))
        
data_types = [(num_det, 'r+', 'Num Det'), (rain, '', 'Rain'), (humidity, '', 'Humidity'),
              (wind, '', 'Wind'), (temp, '', 'Temp (C)')]
    
fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(12,10))
plt.suptitle('All Detections (%d)' % YEAR_SEL)
plt.tight_layout(pad=4)

for i, (values,form,title) in enumerate(data_types):
    axes[i].plot(dates, values, form)
    axes[i].set_title(title)