In [None]:
pip install pandas geopandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import the necessary libraries
import pandas as pd
import geopandas as gpd
import os

In [3]:
def load_and_filter_fault_data():
    """
    Load fault data from GeoJSON and filter for EMME catalog entries.
    
    Returns:
        gpd.GeoDataFrame: Filtered fault data containing only EMME catalog entries
    """
    
    # Load the fault data
    print("Loading fault data from gem_active_faults_harmonized.geojson...")
    fault_data_path = "data/gem_active_faults_harmonized.geojson"
    
    if not os.path.exists(fault_data_path):
        raise FileNotFoundError(f"Fault data file not found: {fault_data_path}")
    
    # Read the GeoJSON file into a GeoDataFrame
    faults_gdf = gpd.read_file(fault_data_path)
    
    print(f"Total fault records loaded: {len(faults_gdf)}")
    print(f"Available catalog names: {faults_gdf['catalog_name'].unique()}")
    
    # Filter for EMME catalog entries only
    print("\nFiltering for EMME catalog entries...")
    emme_faults = faults_gdf[faults_gdf['catalog_name'] == 'EMME'].copy()
    
    print(f"EMME fault records found: {len(emme_faults)}")
    
    if len(emme_faults) == 0:
        print("Warning: No EMME catalog entries found in the data!")
        return emme_faults
    
    # Display basic information about the filtered data
    print(f"\nEMME Faults Summary:")
    print(f"- Number of fault segments: {len(emme_faults)}")
    print(f"- Columns: {list(emme_faults.columns)}")
    
    # Show first few records
    print(f"\nFirst 3 EMME fault records:")
    print(emme_faults.head(3))
    
    return emme_faults

In [4]:
def save_emme_faults(emme_faults_gdf, output_filename="EMME_faults.geojson"):
    """
    Save the filtered EMME fault data to a GeoJSON file.
    
    Args:
        emme_faults_gdf (gpd.GeoDataFrame): Filtered EMME fault data
        output_filename (str): Output filename for the GeoJSON file
    """
    
    if len(emme_faults_gdf) == 0:
        print("No EMME fault data to save.")
        return
    
    print(f"\nSaving EMME fault data to {output_filename}...")
    emme_faults_gdf.to_file(output_filename, driver='GeoJSON')
    
    # Verify the file was created
    if os.path.exists(output_filename):
        file_size = os.path.getsize(output_filename)
        print(f"✓ Successfully saved {len(emme_faults_gdf)} fault segments to {output_filename}")
        print(f"  File size: {file_size:,} bytes")
    else:
        print(f"✗ Error: Failed to create {output_filename}")

In [5]:
def main():
    """Main function to process fault data."""
    
    try:
        # Load and filter fault data
        emme_faults = load_and_filter_fault_data()
        
        # Save filtered data
        save_emme_faults(emme_faults)
        
        print("\n" + "="*50)
        print("Fault data processing completed successfully!")
        print("="*50)
        
    except Exception as e:
        print(f"Error processing fault data: {str(e)}")
        raise

In [6]:
main()

Loading fault data from gem_active_faults_harmonized.geojson...
Total fault records loaded: 13696
Available catalog names: ['UCERF3' 'AUS_FSD' 'Active Tectonics of the Andes'
 'Macgregor_AfricaFaults' 'EMME' 'GEM Faulted Earth' 'SHARE'
 'HimaTibetMap' 'philippines' 'SARA' 'USGS Hazfaults 2014' 'Shyu Taiwan'
 'Villegas Mexico' 'Bird 2003' 'Litchfield_NZ_2013' 'EOS_SE_Asia'
 'GEM_NE_Asia' 'GEM_N_Africa' 'GEM_Central_Am_Carib' 'SMSSD']

Filtering for EMME catalog entries...
EMME fault records found: 778

EMME Faults Summary:
- Number of fault segments: 778
- Columns: ['average_dip', 'average_rake', 'catalog_id', 'catalog_name', 'dip_dir', 'lower_seis_depth', 'name', 'net_slip_rate', 'slip_type', 'upper_seis_depth', 'downthrown_side_id', 'reference', 'epistemic_quality', 'accuracy', 'activity_confidence', 'fs_name', 'last_movement', 'downthrown_side_dir', 'vert_sep_rate', 'strike_slip_rate', 'exposure_quality', 'shortening_rate', 'notes', 'geometry']

First 3 EMME fault records:
          