<a href="https://colab.research.google.com/github/eliwagnercode/SmokeyBear/blob/main/CaliforniaWildfire_IgnitionPoint_GIS_Attributes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Packages

In [None]:
# @title Installations
!pip install geopandas
!pip install geemap
!pip install click --upgrade
!pip install earthengine-api
!pip install tensorflow
!pip install tensorflow_decision_forests

In [2]:
# @title Mount personal Google Drive
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# @title Import Packages
import ee # Import Google Earth Engine
ee.Authenticate() # Trigger the authentication flow.
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com') # Initialize the library.
import geemap

# import logging
# import multiprocessing
# import requests
# import shutil
# from retry import retry

import math, numpy as np, pandas as pd
from tqdm import tqdm, trange # used for progress bars
import geopandas as gpd
from shapely import wkt
from datetime import datetime
import time

# ML
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tensorflow_decision_forests as tfdf
# from geopy.geocoders import Nominatim

# Raw Data Acquisition - Historical Wildfire Ignition Points

In [None]:
# @title # Download archive from US Forest Service
!wget https://www.fs.usda.gov/rds/archive/products/RDS-2013-0009.6/RDS-2013-0009.6_SQLITE.zip

# Unarchive data
!unzip /content/RDS-2013-0009.6_SQLITE.zip

In [None]:
# @title # Extract data from SQLite database

import sqlite3
conn = sqlite3.connect('/content/Data/FPA_FOD_20221014.sqlite')
cur = conn.cursor()
cur.execute("PRAGMA table_info('Fires')")
sql_cols = cur.fetchall()
df_cols = []
for col in sql_cols:
  df_cols.append(col[1])
cur.execute("select * from 'Fires'")
sql_rows = cur.fetchall()
df_rows = []
for row in sql_rows:
  df_rows.append(list(row))

# Convert to Pandas DataFrame
import pandas as pd
gdf_USFires = pd.DataFrame(data=df_rows,columns=df_cols)

# Drop columns and improve readability
gdf_USFires = gdf_USFires[
    ['FPA_ID','LONGITUDE','LATITUDE','FIRE_SIZE','FIRE_SIZE_CLASS',
     'FIRE_YEAR','DISCOVERY_DATE','STATE']
    ].rename(
    columns={
    'LONGITUDE':'lon',
    'LATITUDE':'lat',
    'FIRE_SIZE':'fireSize',
    'FIRE_SIZE_CLASS':'fireClass',
    'FIRE_YEAR':'fireYear',
    'DISCOVERY_DATE':'fireDate',
    'STATE':'stateName'}
    ).drop_duplicates('FPA_ID')
gdf_USFires

In [7]:
# @title # Convert DataFrame to GeoDataFrame

import geopandas as gpd # !pip install geopandas

geom = gpd.points_from_xy(
    gdf_USFires['lon'], gdf_USFires['lat']
)
gdf_USFires = gpd.GeoDataFrame(
    gdf_USFires,
    geometry = geom,
    crs='EPSG:4326'
    )[['geometry','FPA_ID','fireSize','fireClass',
       'fireYear','fireDate','stateName']]
gdf_USFires

In [None]:
# @title # Create subset for California
gdf_CAFires = gdf_USFires[gdf_USFires['stateName'] == 'CA']
gdf_CAFires

In [44]:
# @title # Download clean data for easier loading

def gdf_to_csv(gdf,filename=None,timestamp=True):
  filename = str(filename)
  if timestamp == True:
    today = datetime.now().strftime('%Y-%m-%d')
    path = f'/content/{today}_{filename}'
  else:
    path = f'/content/{filename}'
  with open(path, 'w', encoding = 'utf-8-sig') as f:
    gdf.to_csv(f)

filename = 'USFires_IgnitionPoints.csv'
gdf_to_csv(gdf_USFires,filename,timestamp=False)

filename = 'CAFires_IgnitionPoints.csv'
gdf_to_csv(gdf_CAFires,filename,timestamp=False)

# Data Acquisition - Ignition Point GIS Attributes

In [18]:
# @title Create DRY functions
def csv_to_gdf(filepath):
  gdf = pd.read_csv(filepath)
  gdf = gdf.drop(gdf.columns[0],axis=1)
  gdf['geometry'] = gdf['geometry'].apply(wkt.loads) # Convert geometry string to WKT geometry object
  gdf = gpd.GeoDataFrame(gdf,geometry='geometry').set_crs(epsg=4326) # Convert df to gdf
  return gdf

def gdf_to_csv(gdf,filename=None,timestamp=True):
  filename = str(filename)
  if timestamp == True:
    today = datetime.now().strftime('%Y-%m-%d')
    path = f'/content/{filename}_{today}'
  else:
    path = f'/content/{filename}'
  with open(path, 'w', encoding = 'utf-8-sig') as f:
    gdf.to_csv(f)

def trim_gdf(gdf,index):
  gdf = gdf.drop(index).reset_index(drop=True)
  return gdf

def split_gdf(gdf,splitSize=1000): # GEE cancels queries amassing > 5000 elements
  split_gdf_list = np.array_split(gdf.copy(),
                           range(0,len(gdf),splitSize)
                           )[1:] # first element returns empty gdf
  return split_gdf_list

def get_results(gdf,functionToMap):
  start_time = time.time()
  split_gdf_list = split_gdf(gdf)
  gdf_allResults = gpd.GeoDataFrame()
  for gdf in tqdm(split_gdf_list,
                  total = len(split_gdf_list),
                  desc = "Downloading and extracting data"):
    featCol = geemap.geopandas_to_ee(gdf)
    resultCol = featCol.map(functionToMap,opt_dropNulls=True)
    gdf_results = geemap.ee_to_geopandas(resultCol)
    gdf_results = gdf_results.set_crs('EPSG:4326')
    gdf_allResults = gpd.GeoDataFrame(
        pd.concat([gdf_allResults.copy(),gdf_results.copy()],
                  ignore_index=True),
                  crs='EPSG:4326')
  print(
      '\n' + str(round((time.time()-start_time)/60,3))
      +' minutes total download time')
  return gdf_allResults

In [None]:
# @title Load Main GeoDataFrame (gdf_CAFires)
filepath = 'https://raw.githubusercontent.com/eliwagnercode/SmokeyBear/main/CAFires_IgnitionPoints.csv'
gdf_CAFires = csv_to_gdf(filepath).drop(columns='stateName')
gdf_CAFires

In [None]:
# @title Topography

def get_topo(feat):
  elevation_img = ee.Image('USGS/SRTMGL1_003').select('elevation')
  slope_img = ee.Terrain.slope(ee.Image('USGS/SRTMGL1_003')).select('slope')
  aspect_img = ee.Terrain.aspect(ee.Image('USGS/SRTMGL1_003')).select('aspect')
  elevation = elevation_img.sample(feat.geometry(),scale=10).first().get('elevation')
  slope = slope_img.sample(feat.geometry(),scale=10).first().get('slope')
  aspect = aspect_img.sample(feat.geometry(),scale=10).first().get('aspect')
  return feat.set({'elevation':elevation,
                   'slope':slope,
                   'aspect':aspect})

gdf_topo = gdf_CAFires.copy()
gdf_topo = get_results(gdf_topo,get_topo)
filename = 'gdf_topo.csv'
gdf_to_csv(gdf_topo)
gdf_topo

In [None]:
# @title Vegetation Index
def get_ndvi(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'year')
  img = ee.ImageCollection("MODIS/061/MOD13A1")\
    .filterDate(i_date,f_date)\
    .filterBounds(feat.geometry())\
    .select('NDVI')\
    .mean()
  ndvi = img.sample(feat.geometry(),scale=10).first().get('NDVI')
  return feat.set({'NDVI':ndvi})

gdf_ndvi = gdf_CAFires.copy()
gdf_ndvi = get_results(gdf_ndvi,get_topo)
filename = 'gdf_ndvi.csv'
gdf_to_csv(gdf_ndvi)
gdf_ndvi

In [None]:
# @title Solar Radiation Annual Mean
def get_solarRadAnnual(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'year')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR') \
    .filterDate(i_date,f_date)\
    .filterBounds(feat.geometry())\
    .select('surface_net_solar_radiation_sum') # (J/m^2)
  img = imgCol.mean()
  solarRadAnnual = img.sample(feat.geometry(),scale=10)\
    .first()\
    .get('surface_net_solar_radiation_sum')
  return feat.set({'solarRadAnnual':solarRadAnnual})

gdf_solarRadAnnual = gdf_CAFires.copy()
gdf_solarRadAnnual = get_results(gdf_solarRadAnnual,get_topo)
filename = 'gdf_solarRadAnnual.csv'
gdf_to_csv(gdf_solarRadAnnual)
gdf_solarRadAnnual

In [None]:
# @title Solar Radiation Monthly Mean
def get_solarRadMonthly(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'month')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR') \
    .filterDate(i_date,f_date)\
    .filterBounds(feat.geometry())\
    .select('surface_net_solar_radiation_sum') # (J/m^2)
  img = imgCol.mean()
  solarRadMonthly = img.sample(feat.geometry(),scale=10)\
    .first()\
    .get('surface_net_solar_radiation_sum')
  return feat.set({'solarRadMonthly':solarRadMonthly})

gdf_solarRadMonthly = gdf_CAFires.copy()
gdf_solarRadMonthly = get_results(gdf_solarRadMonthly,get_topo)
filename = 'gdf_solarRadMonthly.csv'
gdf_to_csv(gdf_solarRadMonthly)
gdf_solarRadMonthly

In [None]:
# @title Solar Radiation Daily
def get_solarRadDaily(feat):
  date = ee.Date(feat.get('fireDate'))
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
    .filterDate(date) \
    .select('surface_net_solar_radiation_sum') # (J/m^2)
  img = imgCol.mean()
  solarRadDaily = img.sample(feat.geometry(),scale=10).first().get('surface_net_solar_radiation_sum')
  return feat.set({'solarRadDaily':solarRadDaily})

gdf_solarRadDaily = gdf_CAFires.copy()
gdf_solarRadDaily = get_results(gdf_solarRadDaily,get_topo)
filename = 'gdf_solarRadDaily.csv'
gdf_to_csv(gdf_solarRadDaily)
gdf_solarRadDaily

In [None]:
# @title Air Temp Annual Mean

def get_airTempAnnual(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'year')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR') \
    .filterDate(i_date,f_date) \
    .filterBounds(feat.geometry())\
    .select("temperature_2m") # (°K)
  img = imgCol.mean()
  airTempAnnual = img.sample(feat.geometry(),scale=10).first().get('temperature_2m')
  return feat.set({'airTempAnnual':airTempAnnual})

gdf_airTempAnnual = gdf_CAFires.copy()
gdf_airTempAnnual = get_results(gdf_airTempAnnual,get_topo)
filename = 'gdf_airTempAnnual.csv'
gdf_to_csv(gdf_airTempAnnual)
gdf_airTempAnnual

In [None]:
# @title Air Temp Monthly Mean

def get_airTempMonthly(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'month')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR') \
    .filterDate(i_date,f_date) \
    .filterBounds(feat.geometry())\
    .select("temperature_2m") # (°K)
  img = imgCol.mean()
  airTempMonthly = img.sample(feat.geometry(),scale=10).first().get('temperature_2m')
  return feat.set({'airTempMonthly':airTempMonthly})

gdf_airTempMonthly = gdf_CAFires.copy()
gdf_airTempMonthly = get_results(gdf_airTempMonthly,get_topo)
filename = 'gdf_airTempMonthly.csv'
gdf_to_csv(gdf_airTempMonthly)
gdf_airTempMonthly

In [None]:
# @title Air Temp Daily

def get_airTempDaily(feat):
  date = ee.Date(feat.get('fireDate'))
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
    .filterDate(date) \
    .select("temperature_2m") # (°K)
  img = imgCol.mean()
  airTempDaily = img.sample(feat.geometry(),scale=10).first().get('temperature_2m')
  return feat.set({'airTempDaily':airTempDaily})

def split_gdf(gdf,splitSize=5000): # GEE cancels queries amassing > 5000 elements
  split_gdf_list = np.array_split(gdf.copy(),
                           range(0,len(gdf),splitSize)
                           )[1:] # first element returns empty gdf
  return split_gdf_list

gdf_airTempDaily = gdf_CAFires.copy()
gdf_airTempDaily = get_results(gdf_airTempDaily,get_topo)
filename = 'gdf_airTempDaily.csv'
gdf_to_csv(gdf_airTempDaily)
gdf_airTempDaily

In [None]:
# @title Land Cover Type
# Create function with without Python syntax to be executed server-side
def get_landCover(feat):
  f_date = ee.Date(feat.get('fireDate')).advance(1,'day')
  i_date = f_date.advance(-1,'year')
  imgCol = ee.ImageCollection('MODIS/006/MCD12Q1')\
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date)\
    .select('LC_Type1')
  img = imgCol.first()
  landCover = img.sample(feat.geometry(),scale=10).first().get('LC_Type1')
  return feat.set({'landCover':landCover})

gdf_landCover = get_results(gdf_CAFires_2001_2015,get_landCover)
gdf_to_csv(gdf_landCover,'landCover')
gdf_landCover

gdf_landCover = gdf_CAFires.copy()
gdf_landCover = get_results(gdf_landCover,get_topo)
filename = 'gdf_landCover.csv'
gdf_to_csv(gdf_landCover)
gdf_landCover

In [None]:
# @title Land Surface Temp Daily
def get_LST_Daily(feat):
  date = ee.Date(feat.get('fireDate'))
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(date)\
    .select('skin_temperature') # (°K)
  img = imgCol.mean()
  LST_Daily = img.sample(feat.geometry(),scale=10).first().get('skin_temperature')
  return feat.set({'LST_Daily':LST_Daily})

gdf_LST_Daily = gdf_CAFires.copy()
gdf_LST_Daily = get_results(gdf_LST_Daily,get_topo)
filename = 'gdf_LST_Daily.csv'
gdf_to_csv(gdf_LST_Daily)
gdf_LST_Daily

In [None]:
# @title Land Surface Temp Monthly Mean
def get_LST_Monthly(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'month')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date)\
    .select('skin_temperature') # (°K)
  img = imgCol.mean()
  LST_Monthly = img.sample(feat.geometry(),scale=10).first().get('skin_temperature')
  return feat.set({'LST_Monthly':LST_Monthly})

gdf_LST_Monthly = gdf_CAFires.copy()
gdf_LST_Monthly = get_results(gdf_LST_Monthly,get_topo)
filename = 'gdf_LST_Monthly.csv'
gdf_to_csv(gdf_LST_Monthly)
gdf_LST_Monthly

In [None]:
# @title Land Surface Temp Annual Mean
def get_LST_Annual(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'year')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date)\
    .select('skin_temperature') # (°K)
  img = imgCol.mean()
  LST_Annual = img.sample(feat.geometry(),scale=10).first().get('skin_temperature')
  return feat.set({'LST_Annual':LST_Annual})

gdf_LST_Annual = gdf_CAFires.copy()
gdf_LST_Annual = get_results(gdf_LST_Annual,get_topo)
filename = 'gdf_LST_Annual.csv'
gdf_to_csv(gdf_LST_Annual)
gdf_LST_Annual

In [None]:
# @title Precipitation Annual Sum
def get_precipAnnual(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'year')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date) \
    .select('total_precipitation_sum')
  precipAnnual = imgCol.aggregate_sum('total_precipitation_sum')
  return feat.set({'precipAnnual':precipAnnual})

gdf_precipAnnual = gdf_CAFires.copy()
gdf_precipAnnual = get_results(gdf_precipAnnual,get_topo)
filename = 'gdf_precipAnnual.csv'
gdf_to_csv(gdf_precipAnnual)
gdf_precipAnnual

In [None]:
# @title Standardized Precipitation Index
def get_SPI(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'month')
  img = ee.ImageCollection("GRIDMET/DROUGHT") \
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date) \
    .select('spi1y')\
    .mean()
  SPI = img.sample(feat.geometry(),scale=10).first().get('spi1y')
  return feat.set({'SPI':SPI})

gdf_SPI = gdf_CAFires.copy()
gdf_SPI = get_results(gdf_SPI,get_topo)
filename = 'gdf_SPI.csv'
gdf_to_csv(gdf_SPI)
gdf_SPI

In [None]:
ee.ImageCollection("GRIDMET/DROUGHT") # @title Precipitation Daily
def get_precipDaily(feat):
  date = ee.Date(feat.get('fireDate'))
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
    .filterDate(date) \
    .select('total_precipitation_sum') # meters
  img = imgCol.mean()
  precipDaily = img.sample(feat.geometry(),scale=10).first().get('total_precipitation_sum')
  return feat.set({'precipDaily':precipDaily})

gdf_precipDaily = get_results(gdf_CAFires_trim,get_precipDaily)

gdf_precipDaily = gdf_CAFires.copy()
gdf_precipDaily = get_results(gdf_precipDaily,get_topo)
filename = 'gdf_precipDaily.csv'
gdf_to_csv(gdf_precipDaily)
gdf_precipDaily

In [None]:
# @title Wind Speed/Direction Daily
def get_windDaily(feat):
  date = ee.Date(feat.get('fireDate'))
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(date) \
    .select('u_component_of_wind_10m','v_component_of_wind_10m')
  img = imgCol.mean()
  wind_u = img.sample(feat.geometry(),scale=10).first().get('u_component_of_wind_10m')
  wind_v = img.sample(feat.geometry(),scale=10).first().get('v_component_of_wind_10m')
  return feat.set({'wind_u_Daily':wind_u,'wind_v_Daily':wind_v})

gdf_windDaily = get_results(gdf_CAFires_trim,get_windDaily)

# Convert vectors to speed and direction
windSpeedDaily = []
windDirectionDaily = []
wind_u_Daily = gdf_windDaily['wind_u_Daily'].to_list()
wind_v_Daily = gdf_windDaily['wind_u_Daily'].to_list()
for u, v in zip(wind_u_Daily,wind_v_Daily):
  windSpeedDaily.append(math.sqrt(u**2 + v**2))
  windDirectionDaily.append(np.arctan2(v,u))
gdf_windDaily['windSpeedDaily'] = windSpeedDaily  * 180 / math.pi
gdf_windDaily['windDirectionDaily'] = windDirectionDaily  * 180 / math.pi

gdf_windDaily = gdf_CAFires.copy()
gdf_windDaily = get_results(gdf_windDaily,get_topo)
filename = 'gdf_windDaily.csv'
gdf_to_csv(gdf_windDaily)
gdf_windDaily

In [None]:
# @title Wind Speed/Direction Monthly Mean
# https://disc.gsfc.nasa.gov/information/data-in-action?title=Derive%20Wind%20Speed%20and%20Direction%20With%20MERRA-2%20Wind%20Components
def get_windMonthly(feat):
  f_date = ee.Date(feat.get('fireDate'))
  i_date = f_date.advance(-1,'month')
  imgCol = ee.ImageCollection('ECMWF/ERA5_LAND/MONTHLY_AGGR')\
    .filterBounds(feat.geometry())\
    .filterDate(i_date,f_date) \
    .select('u_component_of_wind_10m','v_component_of_wind_10m')
  img = imgCol.mean()
  wind_u = img.sample(feat.geometry(),scale=10).first().get('u_component_of_wind_10m')
  wind_v = img.sample(feat.geometry(),scale=10).first().get('v_component_of_wind_10m')
  return feat.set({'wind_u_Monthly':wind_u,'wind_v_Monthly':wind_v})

gdf_windMonthly = get_results(gdf_CAFires_trim,get_windMonthly)

# Convert vectors to speed and direction
windSpeedMonthly = []
windDirectionMonthly = []
wind_u_Monthly = gdf_windMonthly['wind_u_Monthly'].to_list()
wind_v_Monthly = gdf_windMonthly['wind_u_Monthly'].to_list()
for u, v in zip(wind_u_Monthly,wind_v_Monthly):
  windSpeedMonthly.append(math.sqrt(u**2 + v**2))
  windDirectionMonthly.append(np.arctan2(v,u))
gdf_windMonthly['windSpeedMonthly'] = windSpeedMonthly * 180 / math.pi
gdf_windMonthly['windDirectionMonthly'] = windDirectionMonthly * 180 / math.pi

gdf_windMonthly = gdf_CAFires.copy()
gdf_windMonthly = get_results(gdf_windMonthly,get_topo)
filename = 'gdf_windMonthly.csv'
gdf_to_csv(gdf_windMonthly)
gdf_windMonthly

In [None]:
# @title WeekendBool (Fri/Sat/Sun)
weekendBool = []
dow_list = pd.to_datetime(gdf_CAFires['fireDate']).dt.dayofweek.to_list()
for dow in dow_list:
  if dow in [4,5,6]:
    weekendBool.append(1)
  else:
    weekendBool.append(0)
gdf_CAFires['weekendBool'] = weekendBool

# Merge all DataFrames on 'FPA_ID' to combine attributes

In [None]:
attribute_gdf_list = [
    gdf_topo,
    gdf_ndvi,
    gdf_solarRadAnnual,

    ]
for gdf in attribute_gdf_list:
  gdf = gdf.drop(
      columns = ['geometry', 'fireClass', 'fireDate',
       'fireSize', 'fireYear']
  )
  gdf_CAFires = gdf_CAFires.merge(how='left',on='FPA_ID')

filename = 'CAFires_ClassifierTrainingAttributes.csv'
gdf_to_csv(gdf_CAFires, filename, timestamp = False)

# Machine Learning Models

In [None]:
# @title Load Main DataFrame (df_CAFires_2001_2015)
import pandas as pd

filepath = 'https://raw.githubusercontent.com/eliwagnercode/SmokeyBear/main/CAFires_ClassifierTrainingAttributes.csv'

df_CAFires_2001_2015 = pd.read_csv(filepath).drop('Unnamed: 0', axis = 1)
df_CAFires_2001_2015

Unnamed: 0,FPA_ID,LST_Annual,LST_Monthly,NDVI,SPI,airTempAnnual,airTempMonthly,aspect,elevation,fireClass,landCover,slope,solarRadAnnual,windDirectionDaily,windDirectionMonthly,windSpeedDaily,windSpeedMonthly,solarRadMonthly
0,CDF_2001_55_2223_000013,18.624321,284.766876,0.464,-0.650000,289.400595,284.725031,16.64,431,A,13,3.87,5.415333e+08,-135,-135,1.098640,1.002380,315651666.0
1,FS-368044,13.827644,280.287709,0.579,-0.330000,286.771961,281.297310,90.00,1592,A,9,4.48,5.471113e+08,-135,-135,0.953081,0.617226,303177032.0
2,CDF_2001_55_2223_000012,17.435575,284.961057,0.489,-0.680000,289.982113,285.619329,218.59,198,B,9,10.56,5.281169e+08,-135,-135,0.977278,0.854568,307401312.0
3,CDF_2001_53_2211_000006,14.941612,280.734426,0.536,0.111667,287.719953,282.055120,308.11,16,A,8,4.50,4.682444e+08,-135,45,0.733952,0.156408,214394044.0
4,CDF_2001_56_2234_000028,16.433761,281.117806,0.394,0.483333,290.005050,282.882373,90.00,110,B,9,1.15,4.831926e+08,-135,-135,0.643664,0.155299,253340086.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113289,SFO-2015CACDFRRU148179,19.923015,281.820139,0.337,0.096667,290.902801,282.393015,74.52,582,A,10,3.47,5.132090e+08,-135,-135,0.655306,0.155226,294979208.0
113290,SFO-2015CACDFFKU020374,21.560037,280.440426,0.352,-0.846667,293.485017,281.266487,90.00,96,A,7,3.46,4.853498e+08,45,45,0.573373,0.234397,224518298.0
113291,SFO-2015CACDFSBC001620,17.283322,280.393929,0.449,-1.396667,289.360524,281.067103,206.04,633,A,9,5.15,5.577070e+08,-135,-135,1.811616,0.739769,282724330.0
113292,SFO-2015CACDFMVU029331,20.913000,284.900256,0.269,0.143333,291.152498,285.271955,270.00,85,A,13,1.10,5.230989e+08,-135,45,1.687550,0.337179,314694438.0


In [None]:
# @title Random Forest Classifier
# https://www.youtube.com/watch?v=5qgk9QJ4rdQ
!pip install tensorflow_decision_forests
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tensorflow_decision_forests as tfdf

def RandomForestFromDataFrame(_df_, _label_):
  df = _df_.copy()
  df_train, df_test = train_test_split(
      df, test_size=0.2, random_state=26)
  df_train, df_validation = train_test_split(
      df_train, test_size = 0.2, random_state = 26)

  ds_train = tfdf.keras.pd_dataframe_to_tf_dataset(
      df_train,label=_label_)
  ds_test = tfdf.keras.pd_dataframe_to_tf_dataset(
      df_test,label=_label_)
  ds_validation = tfdf.keras.pd_dataframe_to_tf_dataset(
      df_validation,label=_label_)

  model_rf = tfdf.keras.RandomForestModel()
  model_rf.fit(ds_train,validation_data = ds_validation)
  model_rf.evaluate(ds_test)
  return model_rf

model_rf = RandomForestFromDataFrame(df_CAFires_2001_2015,'fireClass')
model_rf.summary()

Use /tmp/tmp2b46o87t as temporary training directory
Reading training dataset...
Training dataset read in 0:00:06.229765. Found 72508 examples.
Reading validation dataset...
Num validation examples: tf.Tensor(18127, shape=(), dtype=int32)
Validation dataset read in 0:00:01.663671. Found 18127 examples.
Training model...
Model trained in 0:02:37.267376
Compiling model...


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: could not get source code


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: could not get source code
Model compiled.
Model: "random_forest_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
Total params: 1
Trainable params: 0
Non-trainable params: 1
_________________________________________________________________
Type: "RANDOM_FOREST"
Task: CLASSIFICATION
Label: "__LABEL"

Input Features (17):
	FPA_ID
	LST_Annual
	LST_Monthly
	NDVI
	SPI
	airTempAnnual
	airTempMonthly
	aspect
	elevation
	landCover
	slope
	solarRadAnnual
	solarRadMonthly
	windDirectionDaily
	windDirectionMonthly
	windSpeedDaily
	windSpeedMonthly

No weights

Variable Importance: INV_MEAN_MIN_DEPTH:
    1.            "elevation"  0.293469 ################
    2.        "airTempAnnual"  0.140231 #####
    3.            "landCover"  0.137945 ####

In [None]:
# @title Drop fireClass 'A'
df_CAFires_CondensedClasses = df_CAFires_2001_2015.copy()
df_CAFires_CondensedClasses = df_CAFires_CondensedClasses[
    (df_CAFires_CondensedClasses['fireClass'] == 'A')]
model_rf = RandomForestFromDataFrame(df_CAFires_CondensedClasses,'fireClass')
model_rf.summary()

Use /tmp/tmpmz8634i6 as temporary training directory
Reading training dataset...
Training dataset read in 0:00:00.739037. Found 37749 examples.
Reading validation dataset...
Num validation examples: tf.Tensor(9438, shape=(), dtype=int32)
Validation dataset read in 0:00:00.598127. Found 9438 examples.
Training model...
Model trained in 0:00:02.444734
Compiling model...
Model compiled.


In [None]:
model_rf.summary()

NameError: ignored

In [None]:
# @title Drop fireClass 'A' and 'B'
df_CAFires_CondensedClasses = df_CAFires_2001_2015.copy()
df_CAFires_CondensedClasses = df_CAFires_CondensedClasses[
    (df_CAFires_CondensedClasses['fireClass'] == 'A') |
    (df_CAFires_CondensedClasses['fireClass'] == 'B')]
model_rf = RandomForestFromDataFrame(df_CAFires_CondensedClasses,'fireClass')
model_rf.summary()

In [None]:
# @title Condense fire size classes to A, B-D, E-G
df_CAFires_CondensedClasses = df_CAFires_2001_2015.copy()
fireClassList = df_CAFires_CondensedClasses['fireClass'].to_list()
for i, fireClass in enumerate(fireClassList):
  if fireClass in ['B','C','D']:
    fireClassList[i] = 'B/C/D'
  if fireClass in ['E','F','G']:
    fireClassList[i] = 'E/F/G'
df_CAFires_CondensedClasses['fireClass'] = fireClassList
model_rf = RandomForestFromDataFrame(df_CAFires_CondensedClasses,'fireClass')
model_rf.summary()

In [None]:
# @title Logistic Regression

In [None]:
# @title Support Vector Machine (kernel method)

In [None]:
# @title Gradient Boosted Decision Tree
model_gbt = tfdf.keras.GradientBoostedTreesModel()
model_gbt.fit(ds_train,validation_data = ds_validation)
model_gbt.evaluate(ds_test)



In [None]:
# @title Linear Discriminant Analysis

In [None]:
# @title Classification And Regression Trees
model_cart = tfdf.keras.CartModel()
model_cart.fit(ds_train,validation_data = ds_validation)
model_cart.evaluate(ds_test)


In [None]:
# @title