In [None]:
REP_DIR = '/home/cagraff/Documents/dev/fire_prediction/'
SRC_DIR = REP_DIR + 'src/'
DATA_DIR = REP_DIR + 'data/'

# Load system-wide packages
import os
from matplotlib import pyplot as plt
import numpy as np
from scipy.stats import cumfreq
from scipy.stats.stats import pearsonr
import datetime as dt
import pytz
import time
from matplotlib import animation, rc
from IPython.display import HTML
import math
#%matplotlib inline

# Load project packages
os.chdir(SRC_DIR)
from features.loaders import load_cluster_df
from helper import date_util as du
from visualization.mapping import make_map
from helper.geometry import get_default_bounding_box, LatLonBoundingBox

In [None]:
# Load data
cluster_id_5km_3day_f_df = load_cluster_df(os.path.join(DATA_DIR, 'interim/modis/fire_cluster/fire_cluster_ids_5km_3days_forwards_modis_alaska_2007-2016.pkl'))
cluster_id_5km_3day_df = load_cluster_df(os.path.join(DATA_DIR, 'interim/modis/fire_cluster/fire_cluster_ids_5km_3day_modis_alaska_2007-2016.pkl'))
cluster_id_5km_df = load_cluster_df(os.path.join(DATA_DIR, 'interim/modis/fire_cluster/fire_cluster_ids_5km_modis_alaska_2007-2016.pkl'))
cluster_id_5km_10day_df = load_cluster_df(os.path.join(DATA_DIR, 'interim/modis/fire_cluster/fire_cluster_ids_5km_10days_modis_alaska_2007-2016.pkl'))

In [None]:
cluster_id_5km_10day_df

In [None]:
cluster_counts = []
for i in range(int(max(cluster_id_5km_df.cluster_id))):
    sel_df = cluster_id_5km_df[cluster_id_5km_df.cluster_id==i]
    cluster_counts.append((i, len(sel_df)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print 'Num clusters: %d' % len(cluster_counts)
print [c for c in cluster_counts if c[1]][:20]

In [None]:
cluster_counts = []
for i in range(int(max(cluster_id_5km_3day_df.cluster_id))):
    sel_df = cluster_id_5km_3day_df[cluster_id_5km_3day_df.cluster_id==i]
    cluster_counts.append((i, len(sel_df)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print 'Num clusters: %d' % len(cluster_counts)
print [c for c in cluster_counts if c[1]][:20]

In [None]:
cluster_counts = []
for i in range(int(max(cluster_id_5km_10day_df.cluster_id))):
    sel_df = cluster_id_5km_10day_df[cluster_id_5km_10day_df.cluster_id==i]
    cluster_counts.append((i, len(sel_df)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print 'Num clusters: %d' % len(cluster_counts)
print [c for c in cluster_counts if c[1]][:20]

In [None]:
cluster_counts = []
for i in range(int(max(cluster_id_5km_3day_f_df.cluster_id))):
    sel_df = cluster_id_5km_3day_f_df[cluster_id_5km_3day_f_df.cluster_id==i]
    cluster_counts.append((i, len(sel_df)))
cluster_counts.sort(reverse=True, key=lambda x: x[1])
print 'Num clusters: %d' % len(cluster_counts)
print [c for c in cluster_counts if c[1]][:20]

In [None]:
ALASKA_BB = get_default_bounding_box()

def animate_map_latlon(df, bb, dates):
    fig = plt.figure(figsize=(10,15))

    mp = make_map(bb)
    mp.shadedrelief()

    s2 = mp.scatter([], [], 30, latlon=True, marker='o', color='b', alpha=.7)
    s = mp.scatter([], [], 30, latlon=True, marker='o', color='r', alpha=.7)
    

    def init():
        s.set_offsets([])
        s2.set_offsets([])
        return s, s2

    def animate(i):
        date = dates[i]
        
        _ = plt.title('Date %s (day %d)' % (str(date), du.dayofyear_from_datetime(date)))
        
        sel_df = df[df.date_local==date]
        
        lons, lats = mp(list(sel_df.lon), list(sel_df.lat))
        s.set_offsets(zip(lons,lats))
        
        sel_df = df[df.date_local<date]
        lons, lats = mp(list(sel_df.lon), list(sel_df.lat))
        s2.set_offsets(zip(lons,lats))
        
        return s, s2

    # call the animator. blit=True means only re-draw the parts that have changed.
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=len(dates), interval=1000, blit=True)

    return anim

def round_up_to_nearest_half(x):
    return math.ceil(x * 2) / 2

In [None]:
CLUSTER_ID = 198

sel_df = cluster_id_5km_df[cluster_id_5km_df.cluster_id==CLUSTER_ID]
date_start, date_end = np.min(sel_df.date_local), np.max(sel_df.date_local)
dates = [d for d in du.daterange(date_start, date_end)]

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon(sel_df, bb, dates)

HTML(anim.to_html5_video())

In [None]:
CLUSTER_ID = 301

sel_df = cluster_id_5km_3day_df[cluster_id_5km_3day_df.cluster_id==CLUSTER_ID]
date_start, date_end = np.min(sel_df.date_local), np.max(sel_df.date_local)
dates = [d for d in du.daterange(date_start, date_end)]

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
#bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon(sel_df, bb, dates)

HTML(anim.to_html5_video())

In [None]:
CLUSTER_ID = 381

sel_df = cluster_id_5km_10day_df[cluster_id_5km_10day_df.cluster_id==CLUSTER_ID]
date_start, date_end = np.min(sel_df.date_local), np.max(sel_df.date_local)
dates = [d for d in du.daterange(date_start, date_end)]

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
#bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon(sel_df, bb, dates)

HTML(anim.to_html5_video())

In [None]:
CLUSTER_ID = 381

sel_df = cluster_id_5km_3day_f_df[cluster_id_5km_3day_f_df.cluster_id==CLUSTER_ID]
date_start, date_end = np.min(sel_df.date_local), np.max(sel_df.date_local)
dates = [d for d in du.daterange(date_start, date_end)]

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
#bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon(sel_df, bb, dates)

HTML(anim.to_html5_video())

In [None]:
CLUSTER_ID = 374

sel_df = cluster_id_5km_3day_f_df[cluster_id_5km_3day_f_df.cluster_id==CLUSTER_ID]
date_start, date_end = np.min(sel_df.date_local), np.max(sel_df.date_local)
dates = [d for d in du.daterange(date_start, date_end)]

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
#bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon(sel_df, bb, dates)

HTML(anim.to_html5_video())

## Compare clusters to previous clustering

In [None]:
cluster_id_df_old = load_cluster_df(os.path.join(DATA_DIR, 'archived/cluster/clust_df_5.pkl'))

In [None]:
cluster_id_df_old

In [None]:
def animate_map_latlon_old(df, bb, dates):
    fig = plt.figure(figsize=(10,15))

    mp = make_map(bb)
    mp.shadedrelief()

    s = mp.scatter([], [], 30, latlon=True, marker='o', color='b')

    def init():
        s.set_offsets([])
        return s,

    def animate(i):
        dayofyear = dates[i]
        
        _ = plt.title('Day %d' % (dayofyear))
        
        sel_df = df[df.dayofyear==dayofyear]
        
        lons, lats = mp(list(sel_df.lon), list(sel_df.lat))
        s.set_offsets(zip(lons,lats))
        
        return s,

    # call the animator. blit=True means only re-draw the parts that have changed.
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=len(dates), interval=1000, blit=True)

    return anim

In [None]:
CLUSTER_ID = 244

sel_df = cluster_id_df_old[cluster_id_df_old.cluster==CLUSTER_ID]

date_start, date_end = np.min(sel_df.dayofyear), np.max(sel_df.dayofyear)
dates = range(date_start, date_end)

lat_min, lat_max, lon_min, lon_max = np.min(sel_df.lat), np.max(sel_df.lat), np.min(sel_df.lon), np.max(sel_df.lon)
bb = LatLonBoundingBox(round_up_to_nearest_half(lat_min-1), round_up_to_nearest_half(lat_max+1), round_up_to_nearest_half(lon_min-1), round_up_to_nearest_half(lon_max+1))

anim = animate_map_latlon_old(sel_df, bb, dates)

HTML(anim.to_html5_video())