# Reformat Cal Fire data

## Imports and env

In [1]:
import csv
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Globals

In [2]:
BASE_FOLDER_PATH = "/content/drive/My Drive/ML6140  - Project/Raw Data/Cal Fire/"
!ls "{BASE_FOLDER_PATH}"

CalFire_IncidentData.csv		     README.gdoc
CalFire_IncidentData_Reformatted.csv	     reformatCalFireData.ipynb
CalFire_IncidentData_Reformatted_sorted.csv


In [17]:
ORIGINAL_FILENAME = "CalFire_IncidentData.csv"
REFORMATTED_FILENAME = "CalFire_IncidentData_Reformatted.csv"

In [4]:
# Features to keep when filtering
FEATURES = [
  "incident_name",
  "incident_created_year",
  "incident_created_month",
  "incident_created_day",
  "incident_created_hour",
  "incident_created_minute",
  "incident_acres_burned",
  "incident_longitude",
  "incident_latitude",
  "incident_extinguished_year",
  "incident_extinguished_month",
  "incident_extinguished_day",
  "incident_extinguished_hour",
  "incident_extinguished_minute",
]

## Helpers

In [5]:
# Having trouble with datetime library parsing the date strings.
# Always rounds down to the minute. No time zone.
def parse_iso_datetime(iso_datetime_string):
  # "2018-01-09T13:46:00Z"
  datetime = {}
  datetime["year"] = iso_datetime_string[:4]
  datetime["month"] = iso_datetime_string[5:7]
  datetime["day"] = iso_datetime_string[8:10]
  datetime["hour"] = iso_datetime_string[11:13]
  datetime["minute"] = iso_datetime_string[14:16]
  return datetime

In [6]:
def add_reformatted_datetimes(example):
  """
  Add year, month, day, hour and minute features parsed from ISO-formatted
  incident_date_created and incident_date_extinguished.
  """
  created_time = parse_iso_datetime(example["incident_date_created"])
  example["incident_created_year"] = created_time["year"]
  example["incident_created_month"] = created_time["month"]
  example["incident_created_day"] = created_time["day"]
  example["incident_created_hour"] = created_time["hour"]
  example["incident_created_minute"] = created_time["minute"]

  extinguished_time = parse_iso_datetime(example["incident_date_extinguished"])
  example["incident_extinguished_year"] = extinguished_time["year"]
  example["incident_extinguished_month"] = extinguished_time["month"]
  example["incident_extinguished_day"] = extinguished_time["day"]
  example["incident_extinguished_hour"] = extinguished_time["hour"]
  example["incident_extinguished_minute"] = extinguished_time["minute"]

In [7]:
def get_only_filtered_features(example):
  """
  Get a new dictionary from the given dict with only the features from FEATURES.
  """
  filtered = {}
  for feature in FEATURES:
    filtered[feature] = example[feature]
  return filtered

## Main

In [8]:
incidents = []

In [9]:
# Read in data and reformat it.
with open(BASE_FOLDER_PATH + ORIGINAL_FILENAME, newline="") as infile:
  reader = csv.DictReader(infile)

  for row in reader:
    if row["incident_date_created"] > "2013":
      add_reformatted_datetimes(row)
      incidents.append(get_only_filtered_features(row))
    else:
      print("skipping: ", row)

skipping:  {'incident_name': 'Pala Fire', 'incident_is_final': 'Y', 'incident_date_last_update': '2020-09-16T14:07:35Z', 'incident_date_created': '2009-05-24T14:56:00Z', 'incident_administrative_unit': 'CAL FIRE San Diego Unit', 'incident_administrative_unit_url': '', 'incident_county': 'San Diego', 'incident_location': 'Hwy 76 and Pala Temecula, northwest of Pala', 'incident_acres_burned': '122', 'incident_containment': '100', 'incident_control': '', 'incident_cooperating_agencies': 'CAL FIRE San Diego Unit', 'incident_longitude': '1', 'incident_latitude': '1', 'incident_type': 'Wildfire', 'incident_id': '8f61f461-552d-4538-b186-35ab030da416', 'incident_url': 'https://osfm.fire.ca.gov/incidents/2009/5/24/pala-fire/', 'incident_date_extinguished': '2009-05-25T00:00:00Z', 'incident_dateonly_extinguished': '2009-05-25', 'incident_dateonly_created': '2009-05-24', 'is_active': 'N', 'calfire_incident': 'True', 'notification_desired': 'False'}
skipping:  {'incident_name': 'Taglio Fire', 'inc

In [16]:
# Write out reformatted data
with open(BASE_FOLDER_PATH + REFORMATTED_FILENAME, "w", newline="") as outfile:
  writer = csv.DictWriter(outfile, fieldnames=FEATURES)
  writer.writeheader()
  writer.writerows(sorted(incidents, key=lambda x: (x["incident_created_year"], x["incident_created_month"], x["incident_created_day"], x["incident_created_hour"], x["incident_created_minute"])))