In [2]:
import pandas as pd
import numpy as np

In [3]:
fires = pd.read_csv('fire_archive_J1V-C2_695401.csv')
fires

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,37.27382,-80.00159,314.58,0.40,0.44,2024-01-01,646,N20,VIIRS,n,2,271.91,1.08,N,2
1,35.89682,-78.17806,298.31,0.48,0.40,2024-01-01,646,N20,VIIRS,n,2,271.61,0.35,N,0
2,37.54781,-76.73995,299.90,0.44,0.38,2024-01-01,646,N20,VIIRS,n,2,271.99,1.55,N,0
3,32.44569,-90.74300,302.37,0.73,0.76,2024-01-01,647,N20,VIIRS,n,2,275.72,1.54,N,0
4,32.63197,-89.89957,329.62,0.65,0.73,2024-01-01,647,N20,VIIRS,n,2,277.25,4.93,N,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535131,19.40874,-155.27246,348.81,0.39,0.36,2024-12-31,2336,N20,VIIRS,n,2,322.52,12.83,D,1
535132,19.40605,-155.29100,367.00,0.39,0.36,2024-12-31,2336,N20,VIIRS,h,2,338.29,226.27,D,1
535133,19.40767,-155.27988,367.00,0.39,0.36,2024-12-31,2336,N20,VIIRS,h,2,333.70,143.82,D,1
535134,19.40820,-155.27617,367.00,0.39,0.36,2024-12-31,2336,N20,VIIRS,h,2,333.68,12.83,D,1


In [4]:
### DATA CLEANING

# Convert to datetime
fires['acq_date'] = pd.to_datetime(fires['acq_date'])
fires['datetime'] = pd.to_datetime(
    fires['acq_date'].astype(str) + ' ' +
    fires['acq_time'].astype(str).str.zfill(4),
    format='%Y-%m-%d %H%M'
)

# Remove 'low' confidence
fires = fires[fires['confidence'] != 'l']

In [5]:
import geopandas as gpd
from shapely.geometry import Point

In [6]:
# US counties shapefile from US Census Bureau
url = "https://www2.census.gov/geo/tiger/TIGER2023/COUNTY/tl_2023_us_county.zip"

counties_gdf = gpd.read_file(url)

In [7]:
counties_gdf.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOID,GEOIDFQ,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,31,39,835841,31039,0500000US31039,Cuming,Cuming County,6,H1,G4020,,,,A,1477563029,10772508,41.9158651,-96.7885168,"POLYGON ((-96.55516 41.91587, -96.55515 41.914..."
1,53,69,1513275,53069,0500000US53069,Wahkiakum,Wahkiakum County,6,H1,G4020,,,,A,680980771,61564427,46.2946377,-123.4244583,"POLYGON ((-123.72755 46.2645, -123.72756 46.26..."
2,35,11,933054,35011,0500000US35011,De Baca,De Baca County,6,H1,G4020,,,,A,6016818946,29090018,34.3592729,-104.3686961,"POLYGON ((-104.89337 34.08894, -104.89337 34.0..."
3,31,109,835876,31109,0500000US31109,Lancaster,Lancaster County,6,H1,G4020,339.0,30700.0,,A,2169269688,22850324,40.7835474,-96.6886584,"POLYGON ((-96.68493 40.5233, -96.69219 40.5231..."
4,31,129,835886,31129,0500000US31129,Nuckolls,Nuckolls County,6,H1,G4020,,,,A,1489645187,1718484,40.1764918,-98.0468422,"POLYGON ((-98.2737 40.1184, -98.27374 40.1224,..."


In [8]:
counties_gdf = counties_gdf[['STATEFP', 'COUNTYFP', 'NAME', 'geometry']]
counties_gdf.columns = ['state_fips', 'county_fips', 'county_name', 'geometry']

In [9]:
# Create state FIPS to abbreviation mapping
state_fips_to_abbr = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA', '08': 'CO', '09': 'CT',
    '10': 'DE', '11': 'DC', '12': 'FL', '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL',
    '18': 'IN', '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME', '24': 'MD',
    '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS', '29': 'MO', '30': 'MT', '31': 'NE',
    '32': 'NV', '33': 'NH', '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI', '45': 'SC', '46': 'SD',
    '47': 'TN', '48': 'TX', '49': 'UT', '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV',
    '55': 'WI', '56': 'WY', '72': 'PR', '78': 'VI'
}

# Map state FIPS to abbreviations
counties_gdf['state_abbr'] = counties_gdf['state_fips'].map(state_fips_to_abbr)


In [10]:
fires_gdf = gpd.GeoDataFrame(
    fires,
    geometry = gpd.points_from_xy(fires['longitude'], fires['latitude']),
    crs = 'EPSG:4326' # WGS84 coordinate system
)

# Project counties to match fires CRS
counties_gdf = counties_gdf.to_crs('EPSG:4326')

In [11]:
fires_gdf = gpd.sjoin(
    fires_gdf,
    counties_gdf,
    how = 'left',
    predicate = 'within'
)

In [12]:
fires_gdf = fires_gdf.drop(columns = ['satellite', 'instrument', 'geometry', 'index_right'])

In [13]:
fires_gdf

Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,confidence,version,bright_t31,frp,daynight,type,datetime,state_fips,county_fips,county_name,state_abbr
0,37.27382,-80.00159,314.58,0.40,0.44,2024-01-01,646,n,2,271.91,1.08,N,2,2024-01-01 06:46:00,51,770,Roanoke,VA
1,35.89682,-78.17806,298.31,0.48,0.40,2024-01-01,646,n,2,271.61,0.35,N,0,2024-01-01 06:46:00,37,127,Nash,NC
2,37.54781,-76.73995,299.90,0.44,0.38,2024-01-01,646,n,2,271.99,1.55,N,0,2024-01-01 06:46:00,51,097,King and Queen,VA
3,32.44569,-90.74300,302.37,0.73,0.76,2024-01-01,647,n,2,275.72,1.54,N,0,2024-01-01 06:47:00,28,149,Warren,MS
4,32.63197,-89.89957,329.62,0.65,0.73,2024-01-01,647,n,2,277.25,4.93,N,0,2024-01-01 06:47:00,28,089,Madison,MS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535130,19.40550,-155.29474,332.65,0.39,0.36,2024-12-31,2336,n,2,303.51,5.71,D,1,2024-12-31 23:36:00,15,001,Hawaii,HI
535131,19.40874,-155.27246,348.81,0.39,0.36,2024-12-31,2336,n,2,322.52,12.83,D,1,2024-12-31 23:36:00,15,001,Hawaii,HI
535132,19.40605,-155.29100,367.00,0.39,0.36,2024-12-31,2336,h,2,338.29,226.27,D,1,2024-12-31 23:36:00,15,001,Hawaii,HI
535133,19.40767,-155.27988,367.00,0.39,0.36,2024-12-31,2336,h,2,333.70,143.82,D,1,2024-12-31 23:36:00,15,001,Hawaii,HI


In [14]:
fires_gdf['state_abbr'].value_counts().head(10)

state_abbr
OR    63819
ID    50180
TX    48566
CA    45482
LA    19026
FL    18383
AL    18264
GA    17855
WA    16754
AK    15402
Name: count, dtype: int64

In [15]:
fires_gdf[['county_name', 'state_abbr']].value_counts().head()

county_name  state_abbr
Valley       ID            12944
Grant        OR            12896
Boise        ID             9262
Tehama       CA             7895
Harney       OR             7224
Name: count, dtype: int64

In [16]:
fires_gdf.to_csv('data/fires2024.csv', index=False)

In [17]:
fires_gdf.dtypes

latitude              float64
longitude             float64
brightness            float64
scan                  float64
track                 float64
acq_date       datetime64[ns]
acq_time                int64
confidence             object
version                 int64
bright_t31            float64
frp                   float64
daynight               object
type                    int64
datetime       datetime64[ns]
state_fips             object
county_fips            object
county_name            object
state_abbr             object
dtype: object