In [None]:
pip install pandas geopandas

Note: you may need to restart the kernel to use updated packages.


In [1]:
# Import the necessary libraries
import pandas as pd
import geopandas as gpd
import os

In [10]:
def load_and_filter_fault_data():
    """
    Load fault data from GeoJSON and filter for EMME catalog entries.
    
    Returns:
        gpd.GeoDataFrame: Filtered fault data containing only EMME catalog entries
    """
    
    # Load the fault data
    fault_data_path = "data/RAW/gem_active_faults_harmonized.geojson"
    
    if not os.path.exists(fault_data_path):
        raise FileNotFoundError(f"Fault data file not found: {fault_data_path}")
    
    # Read the GeoJSON file into a GeoDataFrame
    faults_gdf = gpd.read_file(fault_data_path)
    catalog_ids_to_keep = [
    "ME_GRCS696", "ME_GRCS697", "ME_GRCS698", "ME_GRCS699", "ME_JO1", "ME_JO2", "ME_JO3", "ME_JO4", "ME_JO5", "ME_JO7", "ME_JO8", "ME_JO99s1", "ME_JO99s2", "ME_LB1s2", "ME_LB2", "ME_LB3s1", "ME_LB3s2", "ME_SY1s2", "ME_SY1s3s1", "ME_SY1s5", "ME_SY1s6", "ME_SY2As3s3", "ME_SY2s1s2", "ME_SY2s1s3", "ME_SY2s2", "ME_SY2s3s3", "NAF_90", "NAF_91", "NAF_92", "NAF_93", "NAF_94", "NAF_95", "NAF_96", "NAF_97", "NAF_98", "NAF_99", "NAF_100", "NAF_101", "NAF_102", "NAF_103", "NAF_104", "NAF_105", "NAF_106", "NAF_107", "NAF_108", "NAF_109", "NAF_110", "NAF_111", "NAF_112", "NAF_113"
    ]
    # Filter for EMME catalog entries only
    emme_faults = faults_gdf[faults_gdf['catalog_id'].isin(catalog_ids_to_keep)].copy()

    # Show first few records
    print(f"\nFirst 3 fault records:")
    print(emme_faults.head(3))
    
    return emme_faults

In [11]:
def save_emme_faults(emme_faults_gdf, output_filename="EMME_faults.geojson"):
    """
    Save the filtered EMME fault data to a GeoJSON file.
    
    Args:
        emme_faults_gdf (gpd.GeoDataFrame): Filtered EMME fault data
        output_filename (str): Output filename for the GeoJSON file
    """
    
    if len(emme_faults_gdf) == 0:
        print("No EMME fault data to save.")
        return
    
    print(f"\nSaving EMME fault data to {output_filename}...")
    emme_faults_gdf.to_file(output_filename, driver='GeoJSON')
    
    # Verify the file was created
    if os.path.exists(output_filename):
        file_size = os.path.getsize(output_filename)
        print(f"✓ Successfully saved {len(emme_faults_gdf)} fault segments to {output_filename}")
        print(f"  File size: {file_size:,} bytes")
    else:
        print(f"✗ Error: Failed to create {output_filename}")

In [12]:
def main():
    """Main function to process fault data."""
    
    try:
        # Load and filter fault data
        emme_faults = load_and_filter_fault_data()
        
        # Save filtered data
        save_emme_faults(emme_faults)
        
        print("\n" + "="*50)
        print("Fault data processing completed successfully!")
        print("="*50)
        
    except Exception as e:
        print(f"Error processing fault data: {str(e)}")
        raise

In [13]:
main()


First 3 fault records:
           average_dip      average_rake  catalog_id catalog_name dip_dir  \
1687  (35.0,25.0,45.0)   (97,80.0,115.0)  ME_GRCS696         EMME    None   
1688  (35.0,25.0,45.0)  (115,90.0,140.0)  ME_GRCS697         EMME    None   
1689  (35.0,25.0,45.0)   (97,80.0,115.0)  ME_GRCS698         EMME    None   

     lower_seis_depth  name   net_slip_rate        slip_type upper_seis_depth  \
1687         (17.5,,)  None  (0.8,0.19,1.5)          Reverse          (3.0,,)   
1688         (17.5,,)  None  (0.8,0.14,1.5)  Dextral-Reverse          (3.0,,)   
1689         (17.5,,)  None  (0.8,0.14,1.5)          Reverse          (3.0,,)   

      ... activity_confidence fs_name last_movement downthrown_side_dir  \
1687  ...                None    None          None                None   
1688  ...                None    None          None                None   
1689  ...                None    None          None                None   

     vert_sep_rate strike_slip_rate expos