This notebook looks into the .kml files from the NOAA’s Hazard Mapping System Fire and Smoke Product for smoke cloud polygons.

    * Datafiles can be found at (https://satepsanone.nesdis.noaa.gov/pub/FIRE/web/HMS/Smoke_Polygons/KML/Annual_Bundles/)
    * parse_hms_smoke_kml function converts .kml to GeoDataFrame for each smoke polygon
    * Contains metadata on location (state and county), start and end dates, and density class (light - heavy)

There are neccesary downloads for locating the state and county for the polygons

    * 2022 TIGER shapefile for state borders
        - !wget https://www2.census.gov/geo/tiger/TIGER2022/STATE/tl_2022_us_state.zip
        - !unzip tl_2022_us_state.zip -d states
    * Census TIGER shapefile for US counties
        - !wget https://www2.census.gov/geo/tiger/TIGER2022/COUNTY/tl_2022_us_county.zip
        - !unzip tl_2022_us_county.zip -d counties



In [2]:
# load state data
import geopandas as gpd

states = gpd.read_file("states/tl_2022_us_state.shp")

states = states[states["STUSPS"].isin([
    "AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA",
    "HI","ID","IL","IN","IA","KS","KY","LA","ME","MD",
    "MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ",
    "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC",
    "SD","TN","TX","UT","VT","VA","WA","WV","WI","WY",
    "DC"
])]


In [3]:
import xml.etree.ElementTree as ET
from shapely.geometry import Polygon
import re

def extract_metadata(description_html):
    """Parse metadata inside the <description><![CDATA[..]]></description> block"""
    text = description_html.replace("<br>", "\n")
    meta = {}

    for line in text.split("\n"):
        if ":" in line:
            key, value = line.split(":", 1)
            meta[key.strip()] = value.strip()

    return meta


def parse_hms_smoke_kml(path, states_gdf):
    """Parse .kml and return GeoDataFrame"""
    ns = {"kml": "http://www.opengis.net/kml/2.2", "gx": "http://www.google.com/kml/ext/2.2"}

    tree = ET.parse(path)
    root = tree.getroot()

    folders = root.findall(".//kml:Folder", ns)

    records = []

    for folder in folders:
        folder_name_tag = folder.find("kml:name", ns)
        if folder_name_tag is None:
            continue

        folder_name = folder_name_tag.text.strip()
        if not folder_name.startswith("Smoke"):
            continue

        # Extract smoke class ("Light", "Medium", "Heavy")
        class_match = re.search(r"\((.*?)\)", folder_name)
        smoke_class = class_match.group(1) if class_match else None

        # Extract all placemarks in the folder
        placemarks = folder.findall(".//kml:Placemark", ns)

        for pm in placemarks:

            # Extract metadata inside <description>
            desc_tag = pm.find("kml:description", ns)
            meta = extract_metadata(desc_tag.text) if desc_tag is not None else {}

            # Extract polygon coordinates
            coords_tag = pm.find(".//kml:Polygon//kml:coordinates", ns)
            if coords_tag is None:
                continue

            coords = []
            for line in coords_tag.text.strip().split():
                lon, lat, *_ = line.split(",")
                coords.append((float(lon), float(lat)))

            poly = Polygon(coords)

            temp_record = {
                "smoke_class": smoke_class,
                "geometry": poly,
                **meta
            }

            records.append(temp_record)

    # Convert to GeoDataFrame
    gdf = gpd.GeoDataFrame(records, crs="EPSG:4326")

    # Intersect with states
    states_gdf = states_gdf.to_crs("EPSG:4326")
    gdf_states = gpd.sjoin(gdf, states_gdf, how="left", predicate="intersects")


    # Clean up columns
    gdf_states = gdf_states.rename(columns={"NAME": "state_name", "STUSPS": "state_code"})
    gdf_states = gdf_states.drop(columns=["index_right"], errors="ignore")

    return gdf_states


In [20]:
gdf = parse_hms_smoke_kml("hms_smoke2020.kml", states) # example for 2020 data only

In [21]:
print(len(gdf))
gdf.head()

59434


Unnamed: 0,smoke_class,geometry,"<div style=""width",End Time,Density,Satellite,REGION,DIVISION,STATEFP,STATENS,GEOID,state_code,state_name,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON
0,Light,"POLYGON ((-80.99603 26.80981, -81.02571 26.770...","170px;"">Start Time: 2020001 1546UTC",2020001 1546UTC,Light,GOES-EAST</div>,3.0,5.0,12.0,294478.0,12.0,FL,Florida,0.0,G4000,A,138962800000.0,45971470000.0,28.3989775,-82.5143005
1,Light,"POLYGON ((-81.20696 26.84106, -81.21009 26.809...","170px;"">Start Time: 2020001 1406UTC",2020001 1716UTC,Light,GOES-EAST</div>,3.0,5.0,12.0,294478.0,12.0,FL,Florida,0.0,G4000,A,138962800000.0,45971470000.0,28.3989775,-82.5143005
2,Light,"POLYGON ((-80.62884 27.02856, -80.66009 27.003...","170px;"">Start Time: 2020001 1541UTC",2020001 1541UTC,Light,GOES-EAST</div>,3.0,5.0,12.0,294478.0,12.0,FL,Florida,0.0,G4000,A,138962800000.0,45971470000.0,28.3989775,-82.5143005
3,Light,"POLYGON ((-82.99548 22.68393, -82.99423 22.725...","170px;"">Start Time: 2020001 2036UTC",2020001 2036UTC,Light,GOES-EAST</div>,,,,,,,,,,,,,,
4,Light,"POLYGON ((-82.77513 29.49692, -82.75513 29.483...","170px;"">Start Time: 2020001 2126UTC",2020001 2126UTC,Light,GOES-EAST</div>,3.0,5.0,12.0,294478.0,12.0,FL,Florida,0.0,G4000,A,138962800000.0,45971470000.0,28.3989775,-82.5143005


In [22]:
cali_gdf = gdf[gdf["state_code"] == "CA"] # filter for cali only

In [23]:
print(len(cali_gdf))
cali_gdf.head()

4072


Unnamed: 0,smoke_class,geometry,"<div style=""width",End Time,Density,Satellite,REGION,DIVISION,STATEFP,STATENS,GEOID,state_code,state_name,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON
11,Light,"POLYGON ((-115.49127 32.74978, -115.48934 32.6...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,6,CA,California,0,G4000,A,403673600000.0,20291710000.0,37.1551773,-119.5434183
12,Light,"POLYGON ((-115.62036 32.68812, -115.59146 32.6...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,6,CA,California,0,G4000,A,403673600000.0,20291710000.0,37.1551773,-119.5434183
15,Light,"POLYGON ((-115.0327 32.71317, -115.00958 32.66...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,6,CA,California,0,G4000,A,403673600000.0,20291710000.0,37.1551773,-119.5434183
482,Light,"POLYGON ((-119.9924 37.91582, -120.00766 37.83...","170px;"">Start Time: 2020006 2006UTC",2020006 2301UTC,Light,GOES-WEST</div>,4,9,6,1779778,6,CA,California,0,G4000,A,403673600000.0,20291710000.0,37.1551773,-119.5434183
572,Light,"POLYGON ((-120.17129 37.97424, -120.14316 37.9...","170px;"">Start Time: 2020007 1500UTC",2020007 1800UTC,Light,GOES-EAST</div>,4,9,6,1779778,6,CA,California,0,G4000,A,403673600000.0,20291710000.0,37.1551773,-119.5434183


In [24]:
import geopandas as gpd

counties = gpd.read_file("counties/tl_2022_us_county.shp")

# Keep only California
counties_ca = counties[counties["STATEFP"] == "06"]
counties_ca = counties_ca.to_crs("EPSG:4326")

ca_smoke = cali_gdf.copy()
ca_smoke = ca_smoke.reset_index(drop=True)

smoke_with_counties = gpd.sjoin(
    ca_smoke,
    counties_ca,
    how="left",
    predicate="intersects"
)

In [25]:
# Group county names by original index
county_lists = (
    smoke_with_counties.groupby(smoke_with_counties.index)["NAME"]
    .apply(lambda x: list(x.unique()))
)

# Take unique polygons
unique_smoke = smoke_with_counties.drop_duplicates(subset=["geometry"]).copy()

# Add county list column
unique_smoke["counties"] = county_lists.values

In [26]:
print(len(unique_smoke))
unique_smoke.head()

4072


Unnamed: 0,smoke_class,geometry,"<div style=""width",End Time,Density,Satellite,REGION,DIVISION,STATEFP_left,STATENS,...,MTFCC_right,CSAFP,CBSAFP,METDIVFP,FUNCSTAT_right,ALAND_right,AWATER_right,INTPTLAT_right,INTPTLON_right,counties
0,Light,"POLYGON ((-115.49127 32.74978, -115.48934 32.6...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,...,G4020,,,,A,10814595263,790216760,33.0408143,-115.3554001,[Imperial]
1,Light,"POLYGON ((-115.62036 32.68812, -115.59146 32.6...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,...,G4020,,,,A,10814595263,790216760,33.0408143,-115.3554001,[Imperial]
2,Light,"POLYGON ((-115.0327 32.71317, -115.00958 32.66...","170px;"">Start Time: 2020002 2100UTC",2020002 2300UTC,Light,GOES-EAST</div>,4,9,6,1779778,...,G4020,,,,A,10814595263,790216760,33.0408143,-115.3554001,[Imperial]
3,Light,"POLYGON ((-119.9924 37.91582, -120.00766 37.83...","170px;"">Start Time: 2020006 2006UTC",2020006 2301UTC,Light,GOES-WEST</div>,4,9,6,1779778,...,G4020,,,,A,3752476985,36220865,37.5743432,-119.9117215,"[Mariposa, Tuolumne]"
4,Light,"POLYGON ((-120.17129 37.97424, -120.14316 37.9...","170px;"">Start Time: 2020007 1500UTC",2020007 1800UTC,Light,GOES-EAST</div>,4,9,6,1779778,...,G4020,,,,A,5752133238,138668837,38.0214344,-119.9647335,[Tuolumne]


In [2]:
import geopandas as gpd
import pandas as pd
import fiona 

gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
gpd.io.file.fiona.drvsupport.supported_drivers['LIBKML'] = 'rw'
geo_df = gpd.read_file('hms_fire2020.kml', driver='KML')


ModuleNotFoundError: No module named 'fiona'

In [3]:
!pip install fiona

Defaulting to user installation because normal site-packages is not writeable
Collecting fiona
  Downloading fiona-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)
Collecting click-plugins>=1.0 (from fiona)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting cligj>=0.5 (from fiona)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fiona-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m37.0 MB/s[0m  [33m0:00:00[0m6m0:00:01[0m
[?25hDownloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: cligj, click-plugins, fiona
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [fiona]32m2/3[0m [fiona]
[1A[2KSuccessfully installed click-plugins-1.1.1.2 cligj-0.7.2 fiona-1.10.1
