# Fire and Weather Timing Open Question Exploration (v2.0)

Casey A Graff

August 31th, 2017

Exploring the relationships in time between fire detections and weather measurements. Also of interest is when to "cut-off" predictions for day T vs T+1.

**v2 now using re-extracted data**

In [None]:
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
SRC_DIR = REP_DIR + 'src/'
DATA_DIR = REP_DIR + 'data/'

# Load system-wide packages
import os
import sys
import numpy as np
from matplotlib import pyplot as plt
import datetime as dt
%matplotlib inline

# Load project packages
os.chdir(SRC_DIR)
from features.loaders import load_integrated_df
from helper import date_util as du
from visualization import timing as tplt
plt.rcParams['figure.figsize'] = [15,7]

In [None]:
# Load data
sys.path.append(SRC_DIR+'features')
int_0800_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_modis_5km_0800_alaska_2007-2016.pkl'))
int_1400_df = load_integrated_df(os.path.join(DATA_DIR, 'interim/integrated/fire_weather/fire_weather_integrated_gfs_modis_5km_1400_alaska_2007-2016.pkl'))

In [None]:
int_1400_df

In [None]:
cluster_counts = []
for i in range(int(max(int_1400_df.cluster_id))):
    sel_df = int_1400_df[int_1400_df.cluster_id==i]
    cluster_counts.append((i, np.sum(sel_df.num_det)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print 'Num clusters: %d' % len(cluster_counts)
print [c for c in cluster_counts if c[1]][:20]

## Explore Timing of Detections and Weather Changes

### Daily Resolution Comparison
Using the integrated data we can compare the daily alignment for measuring weather and different times of day.

In [None]:
cluster_ids_to_plot = [198]

data_types = [('num_det', 'r+', 'Num Det'), ('rain', '', 'Rain'), ('humidity', '', 'Humidity'),
              ('wind', '', 'Wind'), ('temperature', '', 'Temp (C)')]

print 'Using Target Time %.2d00' % 8
for cluster_id in cluster_ids_to_plot:
    sel_df = int_0800_df[int_0800_df.cluster_id==cluster_id]
    tplt.plot_df(sel_df, data_types, 'Cluster #{}'.format(cluster_id))

In [None]:
print 'Using Target Time %.2d00' % 14
for cluster_id in cluster_ids_to_plot:
    sel_df = int_1400_df[int_1400_df.cluster_id==cluster_id]
    tplt.plot_df(sel_df, data_types, 'Cluster #{}'.format(cluster_id))

In [None]:
YEAR_SEL = 2009
fire_season = (dt.date(YEAR_SEL,5,14), dt.date(YEAR_SEL,8,31))
season_dates = [d for d in du.daterange(fire_season[0], fire_season[1]+du.INC_ONE_DAY)]

def apply_with_nans(func, arr):
    arr = [v for v in arr if not np.isnan(v)]
    if not arr: return np.nan 
    else: return func(arr)

def get_day_df_vals(sel_df):
    dates, num_det, rain, humidity, wind, temp = [], [], [], [], [], []
    for date in du.daterange(fire_season[0], fire_season[1]+du.INC_ONE_DAY):
        dates.append(date)
        day_df = sel_df[sel_df.date_local==date]
        num_det.append(apply_with_nans(np.sum, day_df.num_det))
        rain.append(apply_with_nans(np.sum, day_df.rain))
        humidity.append(apply_with_nans(np.mean, day_df.humidity))
        wind.append(apply_with_nans(np.mean, day_df.wind))
        temp.append(apply_with_nans(np.mean, day_df.temperature))
    
    dates = [du.dayofyear_from_datetime(d) for d in dates]
    
    data_types = [(num_det, 'r+', 'Num Det'), (rain, '', 'Rain'), (humidity, '', 'Humidity'),
              (wind, '', 'Wind'), (temp, '', 'Temp (C)')]
    
    return data_types

def make_weather_plot(data_types, dates):
    fig, axes = plt.subplots(nrows=len(data_types), ncols=1, figsize=(12,10))
    plt.suptitle('All Detections (%d)' % YEAR_SEL)
    plt.tight_layout(pad=4)

    for i, (values,form,title) in enumerate(data_types):
        axes[i].plot(dates, values, form)
        axes[i].set_title(title)

print 'Using Target Time %.2d00' % 8
sel_df = int_0800_df[(int_0800_df.date_local >= fire_season[0]) & (int_0800_df.date_local <= fire_season[1])]
data_types = get_day_df_vals(sel_df)
make_weather_plot(data_types, season_dates)

In [None]:
print 'Using Target Time %.2d00' % 14
sel_df = int_1400_df[(int_1400_df.date_local >= fire_season[0]) & (int_1400_df.date_local <= fire_season[1])]
data_types = get_day_df_vals(sel_df)
make_weather_plot(data_types, season_dates)