In [None]:
#pip install fastkml pandas

Collecting fastkml
  Downloading fastkml-1.1.0-py3-none-any.whl.metadata (8.0 kB)
Collecting arrow (from fastkml)
  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)
Collecting pygeoif>=1.5 (from fastkml)
  Downloading pygeoif-1.5.1-py3-none-any.whl.metadata (14 kB)
Collecting types-python-dateutil>=2.8.10 (from arrow->fastkml)
  Downloading types_python_dateutil-2.9.0.20250516-py3-none-any.whl.metadata (2.1 kB)
Downloading fastkml-1.1.0-py3-none-any.whl (107 kB)
Downloading pygeoif-1.5.1-py3-none-any.whl (28 kB)
Downloading arrow-1.3.0-py3-none-any.whl (66 kB)
Downloading types_python_dateutil-2.9.0.20250516-py3-none-any.whl (14 kB)
Installing collected packages: types-python-dateutil, pygeoif, arrow, fastkml
Successfully installed arrow-1.3.0 fastkml-1.1.0 pygeoif-1.5.1 types-python-dateutil-2.9.0.20250516
Note: you may need to restart the kernel to use updated packages.


In [None]:
import zipfile
import xml.etree.ElementTree as ET
import pandas as pd
from bs4 import BeautifulSoup

# Step 1: Unzip the KMZ to get the KML file
kmz_file = 'qfaults.kmz'
with zipfile.ZipFile(kmz_file, 'r') as kmz:
    kmz.extractall('qfaults_kml')

# Step 3: Namespaces
ns = {'kml': 'http://www.opengis.net/kml/2.2'}

# Step 4: Loop through all Placemarks
records = []

for placemark in root.findall(".//kml:Placemark", ns):
    # Initialize metadata dictionary
    metadata = {}

    # Get <name> (e.g., fault name)
    name_elem = placemark.find("kml:name", ns)
    metadata["Fault Name"] = name_elem.text.strip() if name_elem is not None else None

    # Parse the HTML table in <description>
    description_elem = placemark.find("kml:description", ns)
    if description_elem is not None:
        soup = BeautifulSoup(description_elem.text, "html.parser")
        rows = soup.find_all("tr")
        for row in rows:
            cells = row.find_all("td")
            if len(cells) == 2:
                key = cells[0].text.strip()
                value = cells[1].text.strip()
                metadata[key] = value

    # Extract coordinates
    for coords_elem in placemark.findall(".//kml:coordinates", ns):
        coord_text = coords_elem.text.strip()
        coord_pairs = coord_text.split()

        for pair in coord_pairs:
            lon, lat, *_ = pair.split(',')  # ignore altitude
            record = metadata.copy()
            record["Longitude"] = float(lon)
            record["Latitude"] = float(lat)
            records.append(record)

# Step 5: DataFrame
df_faults_full = pd.DataFrame(records)
df_faults_full.head()
print(f"Total points extracted: {len(df_faults_full)}")


               Fault Name          Section Name Fault ID Section ID  \
0  San Andreas fault zone  Shelter Cove Section        1          a   
1  San Andreas fault zone  Shelter Cove Section        1          a   
2  San Andreas fault zone  Shelter Cove Section        1          a   
3  San Andreas fault zone  Shelter Cove Section        1          a   
4  San Andreas fault zone  Shelter Cove Section        1          a   

     Location  Linetype       Age Dip Direction      Slip Rate(mm/year)  \
0  California  Inferred  historic      Vertical  Greater than 5.0 mm/yr   
1  California  Inferred  historic      Vertical  Greater than 5.0 mm/yr   
2  California  Inferred  historic      Vertical  Greater than 5.0 mm/yr   
3  California  Inferred  historic      Vertical  Greater than 5.0 mm/yr   
4  California  Inferred  historic      Vertical  Greater than 5.0 mm/yr   

      Slip Sense  ... Total Fault Length (km)                    Cooperator  \
0  Right lateral  ...                    10

In [16]:
df_faults_full.head()
print(df_faults_full.isnull().sum())

Fault Name                 0
Section Name               0
Fault ID                   0
Section ID                 0
Location                   0
Linetype                   0
Age                        0
Dip Direction              0
Slip Rate(mm/year)         0
Slip Sense                 0
Mapped Scale               0
Fault Class                0
Mapping Certainty          0
Average Strike             0
Total Fault Length (km)    0
Cooperator                 0
Associated Earthquake      0
Date of Last Review        0
Fault url                  0
symbology                  0
Citation ID                0
Fault ID Number            0
Longitude                  0
Latitude                   0
dtype: int64


In [20]:
#Filter for Pacific Northwest bounding box
df_filtered = df_faults_full[
    (df_faults_full['Latitude'] >= 39.5) &
    (df_faults_full['Latitude'] <= 49.5) &
    (df_faults_full['Longitude'] >= -125.0) &
    (df_faults_full['Longitude'] <= -116.0)
]

df_filtered.head()      # Preview data


Unnamed: 0,Fault Name,Section Name,Fault ID,Section ID,Location,Linetype,Age,Dip Direction,Slip Rate(mm/year),Slip Sense,...,Total Fault Length (km),Cooperator,Associated Earthquake,Date of Last Review,Fault url,symbology,Citation ID,Fault ID Number,Longitude,Latitude
0,San Andreas fault zone,Shelter Cove Section,1,a,California,Inferred,historic,Vertical,Greater than 5.0 mm/yr,Right lateral,...,1082,California Geological Survey,San Francisco earthquake,12/10/2002,https://earthquake.usgs.gov/cfusion/qfault/sho...,historic Inferred,1a,1,-124.090694,40.117564
1,San Andreas fault zone,Shelter Cove Section,1,a,California,Inferred,historic,Vertical,Greater than 5.0 mm/yr,Right lateral,...,1082,California Geological Survey,San Francisco earthquake,12/10/2002,https://earthquake.usgs.gov/cfusion/qfault/sho...,historic Inferred,1a,1,-124.090565,40.115692
2,San Andreas fault zone,Shelter Cove Section,1,a,California,Inferred,historic,Vertical,Greater than 5.0 mm/yr,Right lateral,...,1082,California Geological Survey,San Francisco earthquake,12/10/2002,https://earthquake.usgs.gov/cfusion/qfault/sho...,historic Inferred,1a,1,-124.090456,40.11512
3,San Andreas fault zone,Shelter Cove Section,1,a,California,Inferred,historic,Vertical,Greater than 5.0 mm/yr,Right lateral,...,1082,California Geological Survey,San Francisco earthquake,12/10/2002,https://earthquake.usgs.gov/cfusion/qfault/sho...,historic Inferred,1a,1,-124.090124,40.114665
4,San Andreas fault zone,Shelter Cove Section,1,a,California,Inferred,historic,Vertical,Greater than 5.0 mm/yr,Right lateral,...,1082,California Geological Survey,San Francisco earthquake,12/10/2002,https://earthquake.usgs.gov/cfusion/qfault/sho...,historic Inferred,1a,1,-124.089893,40.113931


In [21]:
df_filtered.to_csv("faults_pacific_northwest.csv", index=False)