# Organize wildfire data

In [2]:
import pandas as pd
import os
from datetime import datetime
from shapely.geometry import Polygon, Point

In [3]:
# open initial wildfire data csv
fire_df = pd.read_csv('../datasets/bc_fire_points/BC_Fire_Point_2022.csv')
fire_df.head()

Unnamed: 0,X,Y,Fire Number,IGNITION DATE,LATITUDE,LONGITUDE
0,-121.019483,52.623006,C31003,7/17/2022,52.623,-121.019467
1,-122.375433,51.014706,K71995,8/24/2022,51.0147,-122.375417
2,-118.872232,49.543989,N61926,8/23/2022,49.543983,-118.872217
3,-120.567882,50.472173,K21429,8/12/2022,50.472167,-120.567867
4,-121.489715,49.221423,V11888,8/23/2022,49.221417,-121.4897


In [4]:
# drop columns that are not needed
fire_df = fire_df.drop(["Fire Number", "X", "Y"], axis=1)

In [5]:
# Merge the two columns into one
fire_df['coordinates'] = fire_df['LONGITUDE'].astype(str) + ', ' + fire_df['LATITUDE'].astype(str)

In [6]:
# Change name Ignition Data to date
fire_df = fire_df.rename(columns={"IGNITION DATE": "date"})

In [7]:
fire_df.head()

Unnamed: 0,date,LATITUDE,LONGITUDE,coordinates
0,7/17/2022,52.623,-121.019467,"-121.019467, 52.623"
1,8/24/2022,51.0147,-122.375417,"-122.375417, 51.0147"
2,8/23/2022,49.543983,-118.872217,"-118.872217, 49.543983"
3,8/12/2022,50.472167,-120.567867,"-120.567867, 50.472167"
4,8/23/2022,49.221417,-121.4897,"-121.4897, 49.221417"


In [8]:
# Convert the date column to YYYYMMDD
fire_df['date'] = fire_df['date'].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'))
fire_df['date'] = fire_df['date'].apply(lambda x: x.strftime('%Y%m%d'))

In [9]:
# drop columns that are not needed
fire_df = fire_df.drop(["LATITUDE", "LONGITUDE"], axis=1)
fire_df

Unnamed: 0,date,coordinates
0,20220717,"-121.019467, 52.623"
1,20220824,"-122.375417, 51.0147"
2,20220823,"-118.872217, 49.543983"
3,20220812,"-120.567867, 50.472167"
4,20220823,"-121.4897, 49.221417"
...,...,...
1796,20220822,"-126.947533, 54.082667"
1797,20220714,"-121.446867, 49.782967"
1798,20220819,"-126.409033, 54.18675"
1799,20230116,"-118.743783, 52.43955"


In [10]:
# Create a polygon by a list of coordinates
coordinates_list = [[-128.54948032,51.37397123],
    [-123.42622011591264, 51.26222303279471],
    [-123.52937300599798, 48.20558993286168],
    [-128.3913125586851, 48.7163322815431]]
wanted_polygon = Polygon(coordinates_list)

# Create a column called points
fire_df['points'] = fire_df['coordinates'].apply(lambda x: Point(float(x.split(',')[0]), float(x.split(',')[1])))

In [11]:
fire_df

Unnamed: 0,date,coordinates,points
0,20220717,"-121.019467, 52.623",POINT (-121.019467 52.623)
1,20220824,"-122.375417, 51.0147",POINT (-122.375417 51.0147)
2,20220823,"-118.872217, 49.543983",POINT (-118.872217 49.543983)
3,20220812,"-120.567867, 50.472167",POINT (-120.567867 50.472167)
4,20220823,"-121.4897, 49.221417",POINT (-121.4897 49.221417)
...,...,...,...
1796,20220822,"-126.947533, 54.082667",POINT (-126.947533 54.082667)
1797,20220714,"-121.446867, 49.782967",POINT (-121.446867 49.782967)
1798,20220819,"-126.409033, 54.18675",POINT (-126.409033 54.18675)
1799,20230116,"-118.743783, 52.43955",POINT (-118.743783 52.43955)


In [12]:
# filter column points by the polygon
fire_df = fire_df[fire_df['points'].apply(lambda x: wanted_polygon.contains(x))]

In [13]:
# sort the dataframe by date
fire_df = fire_df.sort_values(by='date')

In [14]:
# save the cleaned data to a new csv
if not os.path.exists("../dataset_tables"):
    os.makedirs("../dataset_tables")

fire_df.to_csv('../dataset_tables/cleaned_wildfire_infomation.csv', index=False)