In [1]:
import calendar
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
from descartes import PolygonPatch
%matplotlib inline

In [2]:
training_data_file = '../data/training_data/2005-2015_training_data.csv'
box_training_data_file = '../data/training_data/2005-2015_training_data_box.csv'
weather_data_box = '../data/NOAA_weather_data/2005-2015_california_box_all.csv'
states_shapefile = '../data/spatial_data/cb_2018_us_state_500k.shp'

LAT_START = 31.52
LON_START = -125.48
LAT_END = 43.0
LON_END = -113.131
GRID_SPACING = 0.1

heatmap_contour_levels = 50
heatmap_time_subset = '2015-11-23 18:00:00'

left  = 0.125  # the left side of the subplots of the figure
right = 0.65   # the right side of the subplots of the figure
bottom = 0.1   # the bottom of the subplots of the figure
top = 0.9      # the top of the subplots of the figure
wspace = 0.2   # the amount of width reserved for blank space between subplots
hspace = 0.3   # the amount of height reserved for white space between subplots

fig_rows = 4
fig_cols = 3
plot_height = 25
plot_width = 30

plot_locations = [
    (0,0),(0,1),(0,2),
    (1,0),(1,1),(1,2),
    (2,0),(2,1),(2,2),
    (3,0),(3,1)
]

data_types = [
    'air.sfc', 
    'air.2m', 
    'apcp', 
    'pres.sfc', 
    'vwnd.10m', 
    'uwnd.10m', 
    'rhum.2m', 
    'veg', 
    'dpt.2m',
    'lat',
    'lon'
]

plot_titles = [
    'Surface air temperature', 
    'Air temperature at 2 meters', 
    'Accumulated precipitation', 
    'Surface pressure', 
    'North-south component of wind', 
    'East-west component of wind', 
    'Relative humidity', 
    'Vegetation', 
    'Dew point temperature',
    'Latitude',
    'Longitude'
]

weather_variable_labels = [
    'Temp. (K)', 
    'Temp (K)', 
    'Precipitation (in)', 
    'Pressure (kPa)', 
    'Windspeed (mph)', 
    'Windspeed (mph)', 
    'Humidity (%)', 
    'Coverage (%)', 
    'Temp. (K)',
    'Latitude',
    'Longitude'
]

def get_california_polygon(shapefile):
    gdf = gpd.read_file(shapefile)
    california = gdf[gdf['NAME'] == 'California']
    return(california)

def regularize_grid(data, data_type):
    #data = data.dropna()
    
    # data coordinates and values
    x = data['lon']
    y = data['lat']
    z = data[data_type]

    # target grid to interpolate to
    xi = np.arange(LON_START,LON_END,GRID_SPACING)
    yi = np.arange(LAT_START,LAT_END,GRID_SPACING)
    xi,yi = np.meshgrid(xi,yi)

    # interpolate
    zi = griddata((x,y),z,(xi,yi),method='cubic')
    
    return xi, yi, zi

def calculate_frac_ignitions(data_type, num_bins):
    max_val = max(data[data_type])
    min_val = min(data[data_type])
    freq = (max_val - min_val) / num_bins
    bins = pd.interval_range(start=min_val, freq=freq, end=max_val)
    ignitions = pd.cut(ignition[data_type], bins=bins)
    all_data = pd.cut(data[data_type], bins=bins)
    fraction_ignitions = ignitions.value_counts() / all_data.value_counts()
    real_bin_nums = range(len(fraction_ignitions))
    return(fraction_ignitions, real_bin_nums)

def CDF_plot(plot_location, data, data_type, title, xlabel, ylabel):
    values, base = np.histogram(data[data_type], bins=40)
    cumulative = np.cumsum(values) / len(data)

    ax[plot_location].plot(base[:-1], cumulative)
    ax[plot_location].tick_params(labelsize=12)
    ax[plot_location].set_title(title, fontsize=18)
    ax[plot_location].set_xlabel(xlabel, fontsize=14)
    ax[plot_location].set_ylabel(ylabel, fontsize=15)
    ax[plot_location].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    ax[plot_location].set_ylim([-0.05,1.05])
    
    return ax

def map_plot(plot_location, california_land_mass, xi, yi, zi, title, num_contour_levels):
    ax[plot_location].add_patch(PolygonPatch(california_land_mass, fc='none', ec='black', lw='2', zorder=2))
    ax[plot_location].contourf(xi, yi, zi, num_contour_levels, cmap='viridis')
    ax[plot_location].set_title(title, fontsize=18)
    ax[plot_location].axis('scaled')
    return ax

def boxplot(plot_location, no_ignition, ignition, data_type, title, xlabel, ylabel):
    plot_data = [no_ignition[data_type], ignition[data_type]]

    ax[plot_location].boxplot(plot_data, widths = 0.6, patch_artist = True)
    ax[plot_location].tick_params(labelsize=12)
    ax[plot_location].set_title(title, fontsize=18)
    ax[plot_location].set_xlabel(xlabel, fontsize=14)
    ax[plot_location].set_ylabel(ylabel, fontsize=15)
    ax[plot_location].set_xticklabels(['no','yes'])
    
    return ax
    
def binned_scatterplot(plot_location, data_type, title, xlabel, ylabel, num_bins):
    plot_data, real_bin_nums = calculate_frac_ignitions(data_type, num_bins)

    ax[plot_location].plot(real_bin_nums, plot_data, 'o')
    ax[plot_location].tick_params(labelsize=12)
    ax[plot_location].set_title(title, fontsize=18)
    ax[plot_location].set_xlabel(xlabel, fontsize=14)
    ax[plot_location].set_xticks([])
    ax[plot_location].set_ylabel(ylabel, fontsize=15)
    ax[plot_location].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    ax[plot_location].set_ylim([-0.05,0.125])
    
    return ax

In [3]:
data = pd.read_csv(training_data_file, low_memory=False)
box_data = pd.read_csv(box_training_data_file, low_memory=False)

d = dict(enumerate(calendar.month_abbr))

data['weather_bin_time'] = pd.to_datetime(data['weather_bin_time'])
data['pres.sfc'] = data['pres.sfc'] / 1000
data['abs.vwnd.10m'] = abs(data['vwnd.10m'])
data['abs.uwnd.10m'] = abs(data['uwnd.10m'])
data['ignition'].fillna(0, inplace=True)
data['weather_bin_time'] = pd.to_datetime(box_data['weather_bin_time'])
data['weather_bin_month'] = data['weather_bin_time'].dt.month
data['weather_bin_month'] = data['weather_bin_month'].map(d)
data['weather_bin_year'] = data['weather_bin_time'].dt.year

fires_per_bin = box_data.groupby(['lat', 'lon'], as_index=False)['ignition'].sum()
fires_per_month = data.groupby(['weather_bin_month','weather_bin_year'], as_index=False)['ignition'].sum()

fires_per_month['ignition'] = fires_per_month['ignition'] / 8 # need to divide by 8 here because currently
                                                              # each fire is assigned to every 3 hr bin on the 
                                                              # day of it's discovery    
ignition = data[data['ignition'] == 1]
no_ignition = data[data['ignition'] == 0]

california = get_california_polygon(states_shapefile)
multipoly = california.loc[16, 'geometry']
california_land_mass = multipoly[-1]

KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(30, 7))
fig.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

x = fires_per_bin['lon']
y = fires_per_bin['lat']
z = fires_per_bin['ignition']

# target grid to interpolate to
xi = np.arange(LON_START, LON_END, GRID_SPACING)
yi = np.arange(LAT_START, LAT_END, GRID_SPACING)
xi, yi = np.meshgrid(xi, yi)

# interpolate
zi = griddata((x, y), z, (xi, yi), method='linear')

ax[0].add_patch(PolygonPatch(california_land_mass, fc='none', ec='black', lw='2', zorder=2))
ax[0].contourf(xi, yi, zi, heatmap_contour_levels, cmap='viridis')
ax[0].set_title('Total fires', fontsize=18)

plot_data = [
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Jan'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Feb'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Mar'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Apr'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'May'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Jun'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Jul'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Aug'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Sep'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Oct'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Nov'],
    fires_per_month.ignition[fires_per_month['weather_bin_month'] == 'Dec']
]

ax[1].boxplot(plot_data, widths = 0.6, patch_artist = True)
ax[1].tick_params(labelsize=12)
ax[1].set_title('Fires per Month', fontsize=18)
ax[1].set_xlabel('Month', fontsize=14)
ax[1].set_ylabel('Count', fontsize=15)
ax[1].set_xticklabels([
    'Jan',
    'Feb',
    'Mar',
    'Apr',
    'May',
    'Jun',
    'Jul',
    'Aug',
    'Sep',
    'Oct',
    'Nov',
    'Dec'
])

plot_data = [
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2005'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2006'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2007'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2008'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2009'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2010'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2011'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2012'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2013'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2014'],
    fires_per_month.ignition[fires_per_month['weather_bin_year'] == '2015']
]

ax[1].boxplot(plot_data, widths = 0.6, patch_artist = True)
ax[1].tick_params(labelsize=12)
ax[1].set_title('Fires per Year', fontsize=18)
ax[1].set_xlabel('Year', fontsize=14)
ax[1].set_ylabel('Count', fontsize=15)
ax[1].set_xticklabels([
    '2005',
    '2006',
    '2007',
    '2008',
    '2009',
    '2010',
    '2011',
    '2012',
    '2013',
    '2014',
    '2015',
])

In [None]:
xlabels = weather_variable_labels
ylabels = ['Cumulative fraction observations'] * len(xlabels)

fig, ax = plt.subplots(4, 3, figsize=(30, 25))
fig.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

for i in range(len(plot_locations)):
    ax[plot_locations[i]] = CDF_plot(plot_locations[i], data, data_types[i], plot_titles[i], xlabels[i], ylabels[i])
    
plt.show()

In [None]:
heatmap_time_subset = '2015-11-23 18:00:00'

box_data = pd.read_csv(weather_data_box)
box_data_one_hour = box_data[box_data['time'] == heatmap_time_subset]
box_data_one_hour = box_data_one_hour.fillna(0)

fig, ax = plt.subplots(4, 3, figsize=(30, 25))
fig.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

for i in range(len(plot_locations)):
    xi, yi, zi = regularize_grid(box_data_one_hour, data_types[i])    
    ax[plot_locations[i]] = map_plot(plot_locations[i], california_land_mass, xi, yi, zi, plot_titles[i], heatmap_contour_levels)

In [None]:
ylabels = weather_variable_labels
xlabels = ['Fire ignition'] * len(ylabels)

fig, ax = plt.subplots(4, 3, figsize=(30, 25))
fig.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

for i in range(len(plot_locations)):
    ax[plot_locations[i]] = boxplot(plot_locations[i], no_ignition, ignition, data_types[i], plot_titles[i], xlabels[i], ylabels[i])
    
plt.show()

In [None]:
xlabels = weather_variable_labels
ylabels = ['Ignition probability'] * len(xlabels)

num_bins = 11

fig, ax = plt.subplots(4, 3, figsize=(30, 25))
fig.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)
def make_boxplots():
for i in range(len(plot_locations)):
    ax[plot_locations[i]] = binned_scatterplot(plot_locations[i], data_types[i], plot_titles[i], xlabels[i], ylabels[i], num_bins)
    
plt.show()

In [None]:
correlation_matrix_all = data.corr()
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

sns.set(style = "white")
cmap = 'viridis'

fig, ax = plt.subplots(figsize=(10, 10))

sns.heatmap(correlation_matrix_all, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

plt.show()