In [None]:
import cPickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from time import time
os.chdir(u'/Users/zbutler/research/fire_prediction')
from data import data
from geometry.grid_conversion import ak_bb, fairbanks_lat_lon, get_latlon_xy_fxns
from prediction.fire_clustering import cluster_over_time_with_merging, FIRE_SEASON
%matplotlib inline

In [None]:
# Load all data
modis = data.load_modis() 
gfs_dict = data.load_gfs_dict()
min_year = np.min(modis.year)
max_year = np.max(modis.year)

In [None]:
# Add daymonth to standard modis dataset and save it
from carpentry.get_feat_df import add_daymonth
modis = add_daymonth(modis)
with open("data/ak_fires.pkl", "w") as fout:
    cPickle.dump(modis, fout, protocol=cPickle.HIGHEST_PROTOCOL)
modis = data.load_modis()
modis.iloc[0:10]

In [None]:
# Try a bunch of potential clustering thresholds, see what seems like it makes sense
threshes = [2.5, 5, 10, 20, 50, 100]
n_clust_arr = []
n_merge_arr = []
for thresh in threshes:
    t = time()
    clust_annual = []
    merge_annual = []
    for year in xrange(min_year, max_year+1):
        clust_df, merge_dict = cluster_over_time_with_merging(modis[modis.year==year], thresh)
        clust_annual.append(len(clust_df.cluster.unique()))
        merge_annual.append(len(merge_dict))
    print "%f thresh took %f sec" %(thresh, time() - t)
    n_clust_arr.append(clust_annual)
    n_merge_arr.append(merge_annual)

In [None]:
marker_arr = ['b--', 'k--', 'r--', 'g--', 'y--', 'b-', 'r-']
plt.figure(figsize=(10,7))
for i, thresh in enumerate(threshes):
    plt.plot(np.arange(min_year, max_year+1), n_clust_arr[i], marker_arr[i], label="Thresh %.2f" % thresh)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
ax = plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
plt.xlabel("Year")
plt.ylabel("Number of clusters")
plt.savefig("pics/n_clusts_by_year.png", bbox_inches='tight')
plt.show()
plt.close()
plt.figure(figsize=(10,7))
for i, thresh in enumerate(threshes):
    plt.plot(np.arange(min_year, max_year+1), n_merge_arr[i], marker_arr[i], label="Thresh %.2f" % thresh)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
ax = plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
plt.xlabel("Year")
plt.ylabel("Number of merged fires this fire season")
plt.savefig("pics/n_merges_by_year.png", bbox_inches='tight')
plt.show()
plt.close()

In [None]:
# Get the cluster data set we want
import prediction.fire_clustering as fc
reload(fc)
from prediction.fire_clustering import cluster_over_time_with_merging, FIRE_SEASON

tau_f = 10.
merge_dict_dict = dict()
clust_df, merge_dict_dict = cluster_over_time_with_merging(modis, tau_f)
print "Big clust df size: " + str(clust_df)
with open("data/merge_dict_dict.pkl", "w") as fpkl:
    cPickle.dump(merge_dict_dict, fpkl, protocol=cPickle.HIGHEST_PROTOCOL)
with open("data/clust_df.pkl", "w") as fpkl:
    cPickle.dump(clust_df, fpkl, protocol=cPickle.HIGHEST_PROTOCOL)
clust_df.iloc[0:10]

In [None]:
# Now, make some plots comparing the number of fires in new clusters vs fires in preexisting fires
years = range(min_year, max_year+1)
days = range(FIRE_SEASON[0], FIRE_SEASON[1]+1)
new_fires_mat = np.zeros((len(years), len(days)))
preex_fires_mat = np.zeros((len(years), len(days)))

for i,year in enumerate(years):
    annual_fires = clust_df[clust_df.year == year]
    clust2startday = dict()
    for clust in annual_fires.cluster.unique():
        clust2startday[clust] = np.min(annual_fires[annual_fires.cluster==clust].dayofyear)
    for j,day in enumerate(days):
        daily_fires = annual_fires[annual_fires.dayofyear == day]
        counts = daily_fires.cluster.value_counts()
        for clust, count in zip(counts.index, np.array(counts)):
            if day == clust2startday[clust]:
                new_fires_mat[year-min_year,day-days[0]] += count
            else:
                preex_fires_mat[year-min_year,day-days[0]] += count

# First, a simple plot by year
plt.figure(figsize=(10,7))
plt.plot(years, np.sum(new_fires_mat, axis=1), 'rs', label="New fires")
plt.plot(years, np.sum(preex_fires_mat, axis=1), 'bs', label="Preexisting fires")
ax = plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
plt.xlabel("Year")
plt.ylabel("Number of fires")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("pics/yearly_new_vs_preex.png", bbox_inches='tight')
plt.show()
plt.close()
plt.figure(figsize=(10,7))
plt.plot(days, np.sum(new_fires_mat, axis=0), 'r--', label="New fires")
plt.plot(days, np.sum(preex_fires_mat, axis=0), 'b--', label="Preexisting fires")
plt.xlabel("Day of season")
plt.ylabel("Number of fires (across all years)")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("pics/daily_new_vs_preex.png", bbox_inches='tight')
plt.show()
plt.close()
plt.figure(figsize=(10,7))
plt.plot(days, new_fires_mat[2013-min_year,:], 'r--', label="New fires")
plt.plot(days, preex_fires_mat[2013-min_year,:], 'b--', label="Preexisting fires")
plt.xlabel("Day of season")
plt.ylabel("Number of fires (2013)")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig("pics/2013_new_vs_preex.png", bbox_inches='tight')
plt.show()

In [None]:
# Load clust_feat_df
clust_feat_df = data.load_clust_feat_df(clust_thresh=5.)
clust_feat_df.iloc[0:10]