In [1]:
import pandas as pd
import numpy as np
import json
import requests
import time
import datetime as dt

In [2]:
# Timeout in seconds
timeout_seconds = 30

# Define the JSON URL
url = "https://apps.fs.usda.gov/arcx/rest/services/EDW/EDW_FireOccurrenceAndPerimeter_01/MapServer/9/query?where=1%3D1&outFields=FIREYEAR,SECURITYID,DISCOVERYDATETIME,SIZECLASS,TOTALACRES,STATCAUSE,DATASOURCE,FIREOUTDATETIME,OWNERAGENCY,UNITIDOWNER,PROTECTIONAGENCY,LATDD83,LONGDD83,POINTTYPE,PERIMEXISTS,FIRERPTQC,ACCURACY,FIRENAME,GLOBALID,CN,REVDATE&outSR=4326&f=json"

try:
    response = requests.get(url, timeout=timeout_seconds)
    data = response.json()

    # Check if data was fetched successfully
    if "error" not in data:
        df = pd.json_normalize(data['features'])
        print("Data load successful!")
    else:
        print("Error fetching data:", data["error"]["message"])

except requests.Timeout:
    print("The request timed out. Please try again later.")

Data load successful!


In [3]:
# Show the DataFrame
df.head()

Unnamed: 0,attributes.FIREYEAR,attributes.SECURITYID,attributes.DISCOVERYDATETIME,attributes.SIZECLASS,attributes.TOTALACRES,attributes.STATCAUSE,attributes.DATASOURCE,attributes.FIREOUTDATETIME,attributes.OWNERAGENCY,attributes.UNITIDOWNER,...,attributes.POINTTYPE,attributes.PERIMEXISTS,attributes.FIRERPTQC,attributes.ACCURACY,attributes.FIRENAME,attributes.GLOBALID,attributes.CN,attributes.REVDATE,geometry.x,geometry.y
0,1966.0,210,-105408000000.0,B,1.0,Lightning,24,-105235200000.0,USFS,COARF,...,General,N,,,Gordon Creek,{3565588F-6D0D-4200-9587-871500846964},,1654560000000.0,-105.470106,40.015613
1,1966.0,210,-104803200000.0,B,0.5,Camping,24,-104630400000.0,USFS,COARF,...,General,N,,,Twin Sisters 1,{ACEDAD28-CA56-4D6C-84A6-B277ED2FAB8D},,1654560000000.0,-105.382479,39.969788
2,1966.0,210,-103248000000.0,B,0.5,Lightning,24,-103075200000.0,USFS,COARF,...,General,N,,,Twin Sisters 2,{2F5A81EA-E30C-4D69-A898-5E3E917CEA8E},,1654560000000.0,-105.493964,40.252711
3,1966.0,210,-102643200000.0,A,0.0,9,24,-102556800000.0,USFS,COARF,...,General,N,,,Long Lake,{838BA271-1200-4EA8-88DD-CB05E57372C0},,1654560000000.0,-105.587667,40.151352
4,1965.0,210,-141609600000.0,B,2.0,Camping,24,-141436800000.0,USFS,COARF,...,General,N,,,Cascade,{9173D33D-294F-49EF-B32A-88F124E77DB7},,1654560000000.0,-105.624667,39.718944


In [4]:
# Show the column names
df.columns

Index(['attributes.FIREYEAR', 'attributes.SECURITYID',
       'attributes.DISCOVERYDATETIME', 'attributes.SIZECLASS',
       'attributes.TOTALACRES', 'attributes.STATCAUSE',
       'attributes.DATASOURCE', 'attributes.FIREOUTDATETIME',
       'attributes.OWNERAGENCY', 'attributes.UNITIDOWNER',
       'attributes.PROTECTIONAGENCY', 'attributes.LATDD83',
       'attributes.LONGDD83', 'attributes.POINTTYPE', 'attributes.PERIMEXISTS',
       'attributes.FIRERPTQC', 'attributes.ACCURACY', 'attributes.FIRENAME',
       'attributes.GLOBALID', 'attributes.CN', 'attributes.REVDATE',
       'geometry.x', 'geometry.y'],
      dtype='object')

In [5]:
# Remove 'attributes.' from column names
df.columns = df.columns.str.replace('attributes.', '', regex=False)
df.head()

Unnamed: 0,FIREYEAR,SECURITYID,DISCOVERYDATETIME,SIZECLASS,TOTALACRES,STATCAUSE,DATASOURCE,FIREOUTDATETIME,OWNERAGENCY,UNITIDOWNER,...,POINTTYPE,PERIMEXISTS,FIRERPTQC,ACCURACY,FIRENAME,GLOBALID,CN,REVDATE,geometry.x,geometry.y
0,1966.0,210,-105408000000.0,B,1.0,Lightning,24,-105235200000.0,USFS,COARF,...,General,N,,,Gordon Creek,{3565588F-6D0D-4200-9587-871500846964},,1654560000000.0,-105.470106,40.015613
1,1966.0,210,-104803200000.0,B,0.5,Camping,24,-104630400000.0,USFS,COARF,...,General,N,,,Twin Sisters 1,{ACEDAD28-CA56-4D6C-84A6-B277ED2FAB8D},,1654560000000.0,-105.382479,39.969788
2,1966.0,210,-103248000000.0,B,0.5,Lightning,24,-103075200000.0,USFS,COARF,...,General,N,,,Twin Sisters 2,{2F5A81EA-E30C-4D69-A898-5E3E917CEA8E},,1654560000000.0,-105.493964,40.252711
3,1966.0,210,-102643200000.0,A,0.0,9,24,-102556800000.0,USFS,COARF,...,General,N,,,Long Lake,{838BA271-1200-4EA8-88DD-CB05E57372C0},,1654560000000.0,-105.587667,40.151352
4,1965.0,210,-141609600000.0,B,2.0,Camping,24,-141436800000.0,USFS,COARF,...,General,N,,,Cascade,{9173D33D-294F-49EF-B32A-88F124E77DB7},,1654560000000.0,-105.624667,39.718944


In [7]:
# Rename the DataFrame
fire_df = df.copy()
fire_df.head()

Unnamed: 0,FIREYEAR,SECURITYID,DISCOVERYDATETIME,SIZECLASS,TOTALACRES,STATCAUSE,DATASOURCE,FIREOUTDATETIME,OWNERAGENCY,UNITIDOWNER,...,POINTTYPE,PERIMEXISTS,FIRERPTQC,ACCURACY,FIRENAME,GLOBALID,CN,REVDATE,geometry.x,geometry.y
0,1966.0,210,-105408000000.0,B,1.0,Lightning,24,-105235200000.0,USFS,COARF,...,General,N,,,Gordon Creek,{3565588F-6D0D-4200-9587-871500846964},,1654560000000.0,-105.470106,40.015613
1,1966.0,210,-104803200000.0,B,0.5,Camping,24,-104630400000.0,USFS,COARF,...,General,N,,,Twin Sisters 1,{ACEDAD28-CA56-4D6C-84A6-B277ED2FAB8D},,1654560000000.0,-105.382479,39.969788
2,1966.0,210,-103248000000.0,B,0.5,Lightning,24,-103075200000.0,USFS,COARF,...,General,N,,,Twin Sisters 2,{2F5A81EA-E30C-4D69-A898-5E3E917CEA8E},,1654560000000.0,-105.493964,40.252711
3,1966.0,210,-102643200000.0,A,0.0,9,24,-102556800000.0,USFS,COARF,...,General,N,,,Long Lake,{838BA271-1200-4EA8-88DD-CB05E57372C0},,1654560000000.0,-105.587667,40.151352
4,1965.0,210,-141609600000.0,B,2.0,Camping,24,-141436800000.0,USFS,COARF,...,General,N,,,Cascade,{9173D33D-294F-49EF-B32A-88F124E77DB7},,1654560000000.0,-105.624667,39.718944


In [8]:
# Show the fire_df data types
fire_df.dtypes

FIREYEAR             float64
SECURITYID            object
DISCOVERYDATETIME    float64
SIZECLASS             object
TOTALACRES           float64
STATCAUSE             object
DATASOURCE            object
FIREOUTDATETIME      float64
OWNERAGENCY           object
UNITIDOWNER           object
PROTECTIONAGENCY      object
LATDD83              float64
LONGDD83             float64
POINTTYPE             object
PERIMEXISTS           object
FIRERPTQC             object
ACCURACY              object
FIRENAME              object
GLOBALID              object
CN                    object
REVDATE              float64
geometry.x           float64
geometry.y           float64
dtype: object

In [9]:
# Convert the 3 date-time columns to readable datetime format
for column in ["DISCOVERYDATETIME", "FIREOUTDATETIME", "REVDATE"]:
    fire_df[column] = pd.to_datetime(fire_df[column], unit='ms', errors='coerce')

In [14]:
# Rename geometry.x	and geometry.y	
fire_df = fire_df.rename(columns={"geometry.x": "LONGITUDE", "geometry.y": "LATITUDE"})

In [15]:
# Show the fire_df data types
fire_df.dtypes

FIREYEAR                    float64
SECURITYID                   object
DISCOVERYDATETIME    datetime64[ns]
SIZECLASS                    object
TOTALACRES                  float64
STATCAUSE                    object
DATASOURCE                   object
FIREOUTDATETIME      datetime64[ns]
OWNERAGENCY                  object
UNITIDOWNER                  object
PROTECTIONAGENCY             object
LATDD83                     float64
LONGDD83                    float64
POINTTYPE                    object
PERIMEXISTS                  object
FIRERPTQC                    object
ACCURACY                     object
FIRENAME                     object
GLOBALID                     object
CN                           object
REVDATE              datetime64[ns]
Longitude                   float64
Latitude                    float64
dtype: object

In [16]:
new_order = ["OWNERAGENCY", "FIREYEAR", "LATDD83", "LONGDD83", "POINTTYPE", "FIRENAME", 
             "STATCAUSE", "SIZECLASS", "TOTALACRES", "Latitude", "Longitude", "DISCOVERYDATETIME", 
             "FIREOUTDATETIME", "REVDATE", "PROTECTIONAGENCY", "ACCURACY", "DATASOURCE", 
             "UNITIDOWNER", "SECURITYID", "PERIMEXISTS", "GLOBALID", "FIRERPTQC", "CN"]
fire_df = fire_df[new_order]
fire_df.head()

Unnamed: 0,OWNERAGENCY,FIREYEAR,LATDD83,LONGDD83,POINTTYPE,FIRENAME,STATCAUSE,SIZECLASS,TOTALACRES,Latitude,...,REVDATE,PROTECTIONAGENCY,ACCURACY,DATASOURCE,UNITIDOWNER,SECURITYID,PERIMEXISTS,GLOBALID,FIRERPTQC,CN
0,USFS,1966.0,40.02104,-105.4747,General,Gordon Creek,Lightning,B,1.0,40.015613,...,2022-06-07,,,24,COARF,210,N,{3565588F-6D0D-4200-9587-871500846964},,
1,USFS,1966.0,39.96445,-105.3802,General,Twin Sisters 1,Camping,B,0.5,39.969788,...,2022-06-07,,,24,COARF,210,N,{ACEDAD28-CA56-4D6C-84A6-B277ED2FAB8D},,
2,USFS,1966.0,40.25272,-105.4934,General,Twin Sisters 2,Lightning,B,0.5,40.252711,...,2022-06-07,,,24,COARF,210,N,{2F5A81EA-E30C-4D69-A898-5E3E917CEA8E},,
3,USFS,1966.0,40.15136,-105.5871,General,Long Lake,9,A,0.0,40.151352,...,2022-06-07,,,24,COARF,210,N,{838BA271-1200-4EA8-88DD-CB05E57372C0},,
4,USFS,1965.0,39.71895,-105.6241,General,Cascade,Camping,B,2.0,39.718944,...,2022-06-07,,,24,COARF,210,N,{9173D33D-294F-49EF-B32A-88F124E77DB7},,


In [18]:
# Display the number of rows in the DataFrame
number_of_rows = len(fire_df)
print("Number of rows:", number_of_rows)

Number of rows: 2000


In [21]:
# Replace NaN and inf values due to error (IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer)
fire_df["FIREYEAR"] = fire_df["FIREYEAR"].fillna(-1)

# Convert "FIREYEAR" to an integer
fire_df["FIREYEAR"] = fire_df["FIREYEAR"].astype(int)

In [24]:
# Exclude placeholder values before calculating min and max
valid_years = fire_df["FIREYEAR"][fire_df["FIREYEAR"] != -1]

# Find and display the oldest and most recent FIREYEAR
oldest_year = valid_years.min()
most_recent_year = valid_years.max()

print("Oldest FIRE YEAR:", oldest_year)
print("Most recent FIRE YEAR:", most_recent_year)

Oldest FIRE YEAR: 1949
Most recent FIRE YEAR: 2023


In [26]:
# Determine how many STATCAUSE are listed as a number 0-9
numerical_counts = {str(num): 0 for num in range(10)}

for value in fire_df['STATCAUSE']:
    if value in numerical_counts:
        numerical_counts[value] += 1

# Display the counts
for num, count in numerical_counts.items():
    print(f"{num}: {count}")

0: 1
1: 364
2: 20
3: 33
4: 172
5: 14
6: 1
7: 4
8: 0
9: 56


In [27]:
# Replace the numerical outcomes with the string values listed in the metadata under <attrlabl>STATCAUSE</attrlabl>
# https://data.fs.usda.gov/geodata/edw/edw_resources/meta/S_USA.FireOccurrence.xml
# Dictionary to map numerical values to descriptive labels
statcause_mapping = {
    "7": "Arson",
    "9": "Miscellaneous",
    "8": "Children",
    "2": "Equipment",
    "1": "Lightning",
    "5": "Debris/Open Burning",
    "6": "Railroad",
    "4": "Camping",
    "3": "Smoking"
}

# Replace numerical values with their corresponding descriptive labels
fire_df["STATCAUSE"] = fire_df["STATCAUSE"].map(statcause_mapping)

In [28]:
# Test the statcause_mapping worked
numerical_counts = {str(num): 0 for num in range(10)}

for value in fire_df['STATCAUSE']:
    if value in numerical_counts:
        numerical_counts[value] += 1

# Display the counts
for num, count in numerical_counts.items():
    print(f"{num}: {count}")

0: 0
1: 0
2: 0
3: 0
4: 0
5: 0
6: 0
7: 0
8: 0
9: 0
