In [1]:
import folium
import zipfile
import io
from fastkml import kml
import pandas as pd
from io import StringIO
import csv
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import MultiLineString, LineString
    
# import pyvista as pv
# pv.set_jupyter_backend('client')

import py3dep

In [5]:
# read KML content 
kmz = zipfile.ZipFile("qfaults.kmz", 'r')  # extract zip file first, then read kmz file inside the extracted folder
kml_content = kmz.open('doc.kml', 'r').read()  # kml content
k = kml.KML()
k.from_string(kml_content)

In [3]:
# Step 3: Filter the landmarks based on a given name
def filter_kml_by_name(k, name_filters):
    filtered_kml = kml.KML()
    ns = '{http://www.opengis.net/kml/2.2}'

    for document in k.features():
        new_doc = kml.Document(ns, document.id, document.name, document.description)
        for folder in document.features():
            new_folder = kml.Folder(ns, folder.id, folder.name, folder.description)
            for placemark in folder.features():
                for name_filter in name_filters:
                    if name_filter in placemark.to_string():
                        new_folder.append(placemark)
                        break
            if new_folder.features():
                new_doc.append(new_folder)
        if new_doc.features():
            filtered_kml.append(new_doc)
    
    return filtered_kml


kml_content = open('NSHM23_GeologicDeformationModel.kml', 'r').read()  # kml content
k = kml.KML()
k.from_string(bytes(kml_content,encoding='utf8'))
# Filter landmarks by name
name_filter = ["Calaveras","Hayward","Rodgers"]  # Change this to the name you want to filter by
filtered_k = filter_kml_by_name(k, name_filter)

# Step 4: Write the filtered KML content to a new file
with open("filtered_nshm23.kml", 'w', encoding='utf-8') as f:
    f.write(filtered_k.to_string(prettyprint=True))

FileNotFoundError: [Errno 2] No such file or directory: 'NSHM23_GeologicDeformationModel.kml'

In [6]:
# Function to extract Placemark data
kml_content = open('filtered_qfaults.kml', 'r').read()  # kml content
filtered_k = kml.KML()
filtered_k.from_string(bytes(kml_content,encoding='utf8'))
def extract_placemark_data(placemark):
    data = {}
    if placemark.extended_data is not None:
        for ed in placemark.extended_data.elements:
            if ed.name == 'FaultID':
                data['id'] = ed.value
            elif ed.name == 'FaultName':
                data['name'] = ed.value
    else:
        if placemark.id is not None:
            data["id"] = placemark.id
            data["name"] = placemark.name

    coordinates = []
    for geom in placemark.geometry.geoms:
        if isinstance(geom, MultiLineString):
            for linestring in geom.geoms:
                coordinates.extend(linestring.coords[:])
        else:
            coordinates.extend(geom.coords[:])
    formatted = []
    for coord in coordinates:
        formatted.append(f"{coord[0]} {coord[1]} {coord[2]}")
    data['multilinestring'] = f"MULTILINESTRING(({",".join(formatted)}))"
    return data

# Extract data
placemarks_data = []
for document in filtered_k.features():
    for feature in document.features():
        if isinstance(feature, kml.Folder):
            for placemark in feature.features():
                placemarks_data.append(extract_placemark_data(placemark))
        elif isinstance(feature, kml.Placemark):
            placemarks_data.append(extract_placemark_data(feature))

# Write to CSV
with open('filtered_qfaults.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'name',"to_join", 'multilinestring'])

    for data in placemarks_data:
        writer.writerow([data['id'], data['name'],"", data['multilinestring']])

FileNotFoundError: [Errno 2] No such file or directory: 'filtered_qfaults.kml'

In [7]:
docs = list(k.features())
folders=[]
for d in docs:
    folders.extend(list(d.features()))
records = []
for f in folders:
    records.extend(list(f.features()))
geoms = [element.geometry for element in records]

names = [element.name for element in records]
print(len(set(names)))
print(len(names))
geoms = [element.geometry for element in records]  # extract geometry

2107
112527


In [8]:
def get_header(record):
    tables = pd.read_html(StringIO(str(record.description)))
    data_table = list(tables[1][0])
    return data_table

def read_record(record):
    tables = pd.read_html(StringIO(str(record.description)))
    data_table = list(tables[1][1])
    return data_table

def get_all_data(record):
    data = []
    data.append(record.name)
    data.append(record.id)
    data.extend(read_record(record))
    data.append(record.geometry.wkt)
    return data

def get_all_headers(record):
    data = []
    data.append("Name")
    data.append("ID")
    data.extend(get_header(record))
    data.append("Geom")
    return data

# print(get_all_headers(records[0]))
# print(get_all_data(records[0]))
# headers = [len(get_header(element)) for element in records]

In [9]:
# dir(geoms[0])

In [10]:
#generate filtered files
filter_name = "Calaveras fault zone"
filtered_records = list(filter(lambda record: (record.name == filter_name), records))  
filtered_records = list(map(get_all_data, filtered_records))
filtered_records = list(filter(lambda record: (record[8] == "historic"), filtered_records))  
print(filtered_records)
filename = f'{filter_name.replace(" ", "")}.csv'
with open(filename, 'w', newline="") as file:
    csvwriter = csv.writer(file)
    header = get_all_headers(records[0])
    csvwriter.writerow(header)
    for record in tqdm (filtered_records, desc="Writing File..."):
        csvwriter.writerow(record)
    # csvwriter.writerows(data) # 5. write the rest of the data

[['Calaveras fault zone', 'ID_013351', 'Calaveras fault zone', 'Northern Calaveras section', '54', 'a', 'California', 'Well Constrained', 'historic', 'Vertical', 'Greater than 5.0 mm/yr', 'Right lateral', '1:24,000', 'A', 'Good', 'N26°W', '156', 'California Geological Survey', nan, '3/4/1999', 'https://earthquake.usgs.gov/cfusion/qfault/show_report_AB_archive.cfm?fault_id=54&section_id=a', 'historic Well Constrained', '54a', '54', 'MULTILINESTRING((-121.8706310002251 37.57822699968291 0.0, -121.8745419999692 37.58673200017499 0.0))'], ['Calaveras fault zone', 'ID_013352', 'Calaveras fault zone', 'Northern Calaveras section', '54', 'a', 'California', 'Moderately Constrained', 'historic', 'Vertical', 'Greater than 5.0 mm/yr', 'Right lateral', 'unspecified', 'A', 'Good', 'N26°W', '156', 'California Geological Survey', nan, '3/4/1999', 'https://earthquake.usgs.gov/cfusion/qfault/show_report_AB_archive.cfm?fault_id=54&section_id=a', 'historic Moderately Constrained', '54a', '54', 'MULTILINE

Writing File...: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 45902.10it/s]
