### Imports

In [1]:
import sqlite3 # to deal with databse
import pandas as pd
from shapely.geometry import Point # to convert 

### Helper Functions

In [2]:
"""
Returns a Connection object that represents the input db.
"""
def get_sql_connection(sql_file): 
    return sqlite3.connect(sql_file)

"""
Returns a df for table from Connection object.
"""
def get_table(table_name, conn):
    query = "Select * from {}".format(table_name)
    return pd.read_sql_query(query, conn)

"""
TODO METHOD HEADER
"""
def insert_time(row, target='DISCOVERY_TIME'): 
    if target == 'DISCOVERY_TIME':
        if row['DATE_DISCOVERED'] is not pd.NaT and row['DISCOVERY_TIME'] is not pd.NaT:
            out = row['DATE_DISCOVERED'] + pd.Timedelta(str(row['DISCOVERY_TIME'].time()))
            return out
    else: 
        if row['DATE_CONTAINED'] is not pd.NaT and row['CONT_TIME'] is not pd.NaT:
            out = row['DATE_CONTAINED'] + pd.Timedelta(str(row['CONT_TIME'].time()))
            return out

### Variable Setup

In [3]:
USGS_DATA_PATH = './data/FPA_FOD_20170508.sqlite'
usgs_db = get_sql_connection(USGS_DATA_PATH) # USGS data

### Load initial data from sqlite db

In [4]:
fires_df = get_table('fires', usgs_db) # load fires data

# create a list of the columns we don't really care about
dropped_cols = ['OBJECTID', 'Shape', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM', 
                       'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID', 
                       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT', 
                       'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID',
                       'LOCAL_INCIDENT_ID', 'FIRE_CODE', 'FIRE_NAME',
                       'ICS_209_INCIDENT_NUMBER', 'ICS_209_NAME', 'MTBS_ID', 
                       'MTBS_FIRE_NAME', 'COMPLEX_NAME', 'STAT_CAUSE_CODE', 
                       'OWNER_CODE']
fires_df.drop(columns=dropped_cols, inplace=True) # drop necessary columns

### Update df for geospatial analysis

In [5]:
# create a geometry columns with Point objects
fires_df['geometry'] = fires_df.apply(lambda x: Point(x.LONGITUDE, x.LATITUDE), axis=1)
# long and lat cols are no longer needed
fires_df.drop(columns=['LONGITUDE', 'LATITUDE'], inplace=True)

### Update df for time series analysis

In [6]:
# make columns that have dtype = datetime[64] for time of day and date
fires_df['DATE_DISCOVERED'] = pd.to_datetime(fires_df['FIRE_YEAR']*1000 + fires_df['DISCOVERY_DOY'], format='%Y%j')
fires_df['DATE_CONTAINED'] = pd.to_datetime(fires_df['FIRE_YEAR']*1000 + fires_df['CONT_DOY'], format='%Y%j')
fires_df['DISCOVERY_TIME'] = pd.to_datetime(fires_df['DISCOVERY_TIME'], errors='coerce', format='%H%M')
fires_df['CONT_TIME'] = pd.to_datetime(fires_df['CONT_TIME'], errors='coerce', format='%H%M')

# organize the time related data into 2 respective columns (discovered and contained)
fires_df['DISCOVERY_TIME'] = fires_df.apply(lambda x: insert_time(x), axis=1)
fires_df['CONT_TIME'] = fires_df.apply(lambda x: insert_time(x, target='CONT_TIME'), axis=1)
fires_df.drop(columns=['FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY', 'CONT_DATE', 'CONT_DOY',
                       'DATE_DISCOVERED', 'DATE_CONTAINED'], inplace=True)

### Export updated df as csv to decrease data load time

In [7]:
fires_df.to_csv('./data/fires_df.csv', index=False)