# Wildfire Risk - Data Pre-Prep - Join Challenge
__Team 3 - Dave Friesen, John Chen, and Kyle Dalope__<br>
__ADS-508-02-SP23__<br><br>
__GitHub link: https://github.com/davefriesen/wildfire-risk__

In [1]:
__authors__ = ['Dave Friesen', 'John Chen', 'Kyle Dalope']
__contact__ = ['dfriesen@sandiego.edu', 'johnchen@sandiego.edu', 'kdalope@sandiego.edu']
__date__ = '2023-03-20'
__license__ = 'MIT'
__version__ = '1.0.2'

# Setup

In [2]:
# Import basic and data access libraries
import pandas as pd
from profiler import profile, profile_cat

# Import utility libraries
import h3

# Geohash and Merge

In [3]:
wfil_df = pd.read_csv('../data/fires.csv', low_memory=False)
wthr_df = pd.read_csv('../data/weather.csv', low_memory=False)
cond_df = pd.read_csv('../data/conditions.csv', low_memory=False)

In [4]:
# Define function to encode latitude and longitude into h3 hexagons
def encode_geohash(row, lat, lng):
    try:
        if pd.notnull(row[lat]) and pd.notnull(row[lng]):
            return h3.geo_to_h3(row[lat], row[lng], resolution=7)
    except ValueError as e:
        print(f"Error: {e}")
    return None

# Add h3 hexagon column to dataframe using apply method
wfil_df['geohash'] = wfil_df.apply(encode_geohash, axis=1, args=('InitialLatitude', 'InitialLongitude'))
geohash_counts = wfil_df.groupby('geohash').size().reset_index(name='Count')
print(geohash_counts.sort_values(by='Count', ascending=False).head(10))

wthr_df['geohash'] = wthr_df.apply(encode_geohash, axis=1, args=('LATITUDE', 'LONGITUDE'))
geohash_counts = wthr_df.groupby('geohash').size().reset_index(name='Count')
print(geohash_counts.sort_values(by='Count', ascending=False).head(10))

cond_df['geohash'] = cond_df.apply(encode_geohash, axis=1, args=('LAT', 'LON'))
geohash_counts = wfil_df.groupby('geohash').size().reset_index(name='Count')
print(geohash_counts.sort_values(by='Count', ascending=False).head(10))

# Save updated dataframe to CSV file
wfil_df.to_csv('../data/fires_geohash.csv', index=False)
wthr_df.to_csv('../data/weather_geohash.csv', index=False)
cond_df.to_csv('../data/conditions_geohash.csv', index=False)

               geohash  Count
26364  8729a56f2ffffff    384
26357  8729a56e9ffffff    325
26356  8729a56e8ffffff    268
26301  8729a5689ffffff    254
25093  8729a1441ffffff    234
25128  8729a146affffff    224
26342  8729a56d6ffffff    218
25237  8729a1559ffffff    215
26315  8729a569dffffff    194
25103  8729a144cffffff    183
             geohash  Count
554  8729ab19effffff    549
23   87268276affffff    449
26   8726835a8ffffff    449
297  872885accffffff    444
321  8728a312affffff    442
335  8728a9620ffffff    441
58   872698809ffffff    438
274  872833804ffffff    437
299  872885c6cffffff    436
292  8728818b6ffffff    436
               geohash  Count
26364  8729a56f2ffffff    384
26357  8729a56e9ffffff    325
26356  8729a56e8ffffff    268
26301  8729a5689ffffff    254
25093  8729a1441ffffff    234
25128  8729a146affffff    224
26342  8729a56d6ffffff    218
25237  8729a1559ffffff    215
26315  8729a569dffffff    194
25103  8729a144cffffff    183


In [5]:
#**********NOTE**********#
# WE MAY ULTIMATELY NEED TO MERGE A SUPERSET OF WEATHER AND CONDITIONS, THEN
#   FIRES, IN ORDER TO CREATE A "FIRE/NOT FIRE" CLASSIFICATION DATASET
merged_df = pd.merge(wfil_df, wthr_df, on='geohash', how='inner')
merged_df = pd.merge(merged_df, cond_df, on='geohash', how='inner')

In [6]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 183406 entries, 0 to 183405
Data columns (total 87 columns):
 #   Column                    Non-Null Count   Dtype  
---  ------                    --------------   -----  
 0   ContainmentDateTime       86543 non-null   object 
 1   ControlDateTime           99641 non-null   object 
 2   DiscoveryAcres            164765 non-null  float64
 3   EstimatedCostToDate       2873 non-null    float64
 4   FinalAcres                8248 non-null    float64
 5   FireBehaviorGeneral       1851 non-null    object 
 6   FireBehaviorGeneral1      1851 non-null    object 
 7   FireBehaviorGeneral2      1235 non-null    object 
 8   FireBehaviorGeneral3      0 non-null       object 
 9   FireCause                 183406 non-null  object 
 10  FireCauseGeneral          30019 non-null   object 
 11  FireCauseSpecific         13150 non-null   object 
 12  FireDiscoveryDateTime     183406 non-null  object 
 13  FireOutDateTime           98463 non-null   o