In [1]:
%matplotlib inline
import os
import numpy as np
import matplotlib.pyplot as plt

# Import the Parsers
from openquake.cat.parsers.isf_catalogue_reader import ISFReader
from openquake.cat.parsers.converters import GenericCataloguetoISFParser
from openquake.cat.isc_homogenisor import (HomogenisorPreprocessor,
                                   DynamicHomogenisor,
                                   MagnitudeConversionRule,
                                   DuplicateFinder)


## Load in Catalogue - Limit to ISC, GCMT/HRVD, EHB, NEIC, BJI

In [2]:
'''
The ISC catalogue is split into four parts because only 50,000 events at a time can be downloaded.
The first four catalogues are from 1900.01.01 to 2021.06.30 as the ISC Reviewed Catalouge.
The fifth catalogue is from 2021.07.01 to 2021.12.31 as the ISC Comprehensive/Unreviewed Catalogue.
The last is the ISC-GEM catalogue


Note that ISC-EHB and EHB are the same. IDC agency is associated with ISC catalogue based on Mw check.

'''

#ISC catalogue from 1900.01.01 to 1999.12.31
isc_parser_1900_1999 = ISFReader("inputs/1900-1999-PH_ISF_Catalogue.isf",
                   selected_origin_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"],
                   selected_magnitude_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
isc_catalogue_1900_1999 = isc_parser_1900_1999.read_file("ISC-RB1", "ISC-1900-1999")

#ISC catalogue from 2000.01.01 to 2009.12.31
isc_parser_2000_2009 = ISFReader("inputs/2000-2009-PH_ISF_Catalogue.isf",
                   selected_origin_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"],
                   selected_magnitude_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
isc_catalogue_2000_2009 = isc_parser_2000_2009 .read_file("ISC-RB2", "ISC-2000-2009")

#ISC catalogue from 2010.01.01 to 2014.12.31
isc_parser_2010_2014 = ISFReader("inputs/2010-2014-PH_ISF_Catalogue.isf",
                   selected_origin_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"],
                   selected_magnitude_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
isc_catalogue_2010_2014 = isc_parser_2010_2014 .read_file("ISC-RB3", "ISC-2010-2014")

#ISC catalogue from 2015.01.01 to 2021.06.30
isc_parser_2015_2021 = ISFReader("inputs/2015-2021-PH_ISF_Catalogue.isf",
                   selected_origin_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"],
                   selected_magnitude_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
isc_catalogue_2015_2021 = isc_parser_2015_2021 .read_file("ISC-RB4", "ISC-2015-2021")

# #ISC catalogue from 2021.07.01 to 2021.12.31
# isc_parser_2021_2023 = ISFReader("inputs/2021-2023-PH_ISF_Catalogue.isf",
#                    selected_origin_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"],
#                    selected_magnitude_agencies=["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
# isc_catalogue_2021_2023 = isc_parser_2021_2023.read_file("ISC-RB5", "ISC-2021-2023")

iscgem_parser = GenericCataloguetoISFParser("inputs/isc-gem-cat.csv")
iscgem_catalogue = iscgem_parser.parse("ISC-GEM", "ISC-GEM-CAT")

OSError: File inputs/1900-1999-PH_ISF_Catalogue.txt does not exist!

In [None]:
isc_catalogue_2021_2023

In [None]:
print("ISC Reviewed Bulleting Catalogue 1900-1999 contains: %d events" % isc_catalogue_1900_1999.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2000-2009 contains: %d events" % isc_catalogue_2000_2009.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2010-2014 contains: %d events" % isc_catalogue_2010_2014.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2015-2021 contains: %d events" % isc_catalogue_2015_2021.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2021-2023 contains: %d events" % isc_catalogue_2021_2023.get_number_events())
print("ISC-GEM Catalogue contains: %d events" % iscgem_catalogue.get_number_events())

In [None]:
merger1900_1999 = DuplicateFinder(isc_catalogue_1900_1999, 0.0001, 0.0001, logging=True)
mergedCatalogue1900_2009 = merger1900_1999.merge_catalogue(isc_catalogue_2000_2009)

In [None]:
print("ISC Reviewed Bulleting Catalogue 1900-2009 contains: %d events" % mergedCatalogue1900_2009.get_number_events())

In [None]:
mergedCatalogue1900_2009

In [None]:
isc_catalogue

In [None]:
merger = DuplicateFinder(iscgem_catalogue, 0.0001, 0.0001, logging=True)
mergedCatalogue = merger.merge_catalogue(isc_catalogue)

In [None]:
## Define Rule Sets

The catalogue covers the years 2005/06. To illustrate how to apply time variable hierarchies we consider two set of rules:

For the origin the order of preference is:

(For 2005): EHB, ISC, NEIC, GCMT/HRVD, BJI

(For 2006): ISC, EHB, NEIC, BJI, GCMT/HRVD


In [None]:
origin_rules = [
    ("1900/01/01 - 2021/12/31", ["ISC-GEM", "ISC-EHB", "EHB", "ISC", "IDC", "NEIC", "NEIS", "USCGS", "NIED", "GCMT", "GUTE", "PAS"])
]

### Magnitude Rules 

#### Weatherill (2015) Table 1. Mw conversions

In [None]:
"""
Weatherill (2015) Table 1. Mw conversions
"""
def iscgem_mw(magnitude):
    """
    For Mw recorded by ISCGEM take the value with no uncertainty
    """
    return magnitude

def iscgem_mw_sigma(magnitude):
    """
    No additional uncertainty   
    """
    return 0.0

def gcmt_mw(magnitude):
    """
    For Mw recorded by GCMT take the value with no uncertainty
    """
    return magnitude

def gcmt_mw_sigma(magnitude):
    """
    No additional uncertainty   
    """
    return 0.0

def neic_mw(magnitude):
    """
    If Mw reported by NEIC,
    """
    return 1.021 * magnitude - 0.091

def neic_mw_sigma(magnitude):
    """
    Uncertainty of 0.101 units
    """
    return 0.105

def nied_mw(magnitude):
    """
    If Mw reported by NIED,
    """
    return 0.964 * magnitude + 0.248

def nied_mw_sigma(magnitude):
    """
    Uncertainty of 0.11 units
    """
    return 0.11

def isc_ms(magnitude):
    """
    If Ms reported by ISC, convert to Mw from Weatherill (2015),
    """
    if magnitude > 6.0:
        return 0.994 * magnitude + 0.1        
    else:
        return 0.616 * magnitude + 2.369

def isc_ms_sigma(magnitude):
    """
    With Magnitude dependent uncertainty
    """
    if magnitude > 6.0:
        return 0.174
    else:
        return 0.147

def neic_ms(magnitude):
    """
    If Ms reported by NEIC, convert to Mw from Weatherill (2015),
    """
    if magnitude > 6.47:
        return 1.005 * magnitude - 0.026       
    else:
        return 0.723 * magnitude + 1.798

def neic_ms_sigma(magnitude):
    """
    With Magnitude dependent uncertainty
    """
    if magnitude > 6.47:
        return 0.187
    else:
        return 0.159
    
def neic_msz(magnitude):
    """
    If Msz reported by NEIC, convert to Mw from Weatherill (2015),
    """
    if magnitude > 6.47:
        return 0.950 * magnitude + 0.359     
    else:
        return 0.707 * magnitude + 1.933

def neic_msz_sigma(magnitude):
    """
    With Magnitude dependent uncertainty
    """
    if magnitude > 6.47:
        return 0.204
    else:
        return 0.179

def neic_mb(magnitude):
    """
    If Mb reported by NEIC,
    """
    return 1.159 * magnitude - 0.659

def neic_mb_sigma(magnitude):
    """
    Uncertainty of 0.283 units
    """
    return 0.283

def isc_mb(magnitude):
    """
    If Mw reported by isc,
    """
    return 1.084 * magnitude - 0.142

def isc_mb_sigma(magnitude):
    """
    Uncertainty of 0.317 units
    """
    return 0.317

def pas_ms(magnitude):
    """
    For Ms recorded by paS take the value with no uncertainty. 
    In their database Pacheco & Sykes (1992) use
    the 20-s period Ms value, which, for our purposes, we treat as
    equivalent to MW in the magnitude range 7.0 ≤ MW ≤ 8.0. (Weatherill, 2015)
    """
    return magnitude

def pas_ms_sigma(magnitude):
    """
    0.2 additional uncertainty   
    """
    return 0.2


### Define Magnitude Hierarchy

In [None]:
rule_set_1900 = [
    MagnitudeConversionRule("ISC-GEM", "Mw", iscgem_mw, iscgem_mw_sigma),
    MagnitudeConversionRule("GCMT", "Mw", gcmt_mw, gcmt_mw_sigma),
    MagnitudeConversionRule("NEIC", "Mw", neic_mw, neic_mw_sigma),
    MagnitudeConversionRule("NIED", "Mw", nied_mw, nied_mw_sigma),
    MagnitudeConversionRule("ISC", "Ms", isc_ms, isc_ms_sigma),
    MagnitudeConversionRule("IDC", "Ms", isc_ms, isc_ms_sigma),
    MagnitudeConversionRule("IDC", "Ms", isc_ms, isc_ms_sigma),
    MagnitudeConversionRule("NEIC", "Ms", neic_ms, neic_ms_sigma),
    MagnitudeConversionRule("USCGS", "Ms", neic_ms, neic_ms_sigma),
    MagnitudeConversionRule("NEIC", "Msz", neic_msz, neic_msz_sigma),
    MagnitudeConversionRule("NEIC", "Mb", neic_mb, neic_mb_sigma),
    MagnitudeConversionRule("NEIS", "Mb", neic_mb, neic_mb_sigma),
    MagnitudeConversionRule("USCGS", "Mb", neic_mb, neic_mb_sigma),
    MagnitudeConversionRule("ISC", "Mb", isc_mb, isc_mb_sigma),
    MagnitudeConversionRule("PAS", "Ms", pas_ms, pas_ms_sigma),
]

magnitude_rules = [
    ("1900/01/01 - 2021/12/31", rule_set_1900)
]

### Pre-processing

Before executing the homogenisation it is necessary to run a preprocessing step. This searches through the catalogue and identifies which conversion rule to apply:

The preprocessor is instantiated with a string describing the sort of rules to be applied.

"time" - Applies time only

"key" - Applies key rules only

"depth" - Applies depth rules only

"time|key" - Applies joint time and key rules

"time|depth" - Applied joint time and depth rules

"depth|key" - Applies joint depth and key rules

In [None]:
preprocessor = HomogenisorPreprocessor("time")
pp_catalogue_1900_1999 = preprocessor.execute(isc_catalogue_1900_1999, origin_rules, magnitude_rules)
pp_catalogue_2000_2009 = preprocessor.execute(isc_catalogue_2000_2009, origin_rules, magnitude_rules)
pp_catalogue_2010_2014 = preprocessor.execute(isc_catalogue_2010_2014, origin_rules, magnitude_rules)
pp_catalogue_2015_2021 = preprocessor.execute(isc_catalogue_2015_2021, origin_rules, magnitude_rules)
pp_catalogue_2021_2023 = preprocessor.execute(isc_catalogue_2021_2023, origin_rules, magnitude_rules)
pp_catalogue_iscgem = preprocessor.execute(iscgem_catalogue, origin_rules, magnitude_rules)
# catalogue = preprocessor.execute(isc_catalogue, origin_rules, magnitude_rules)

### Harmonise the Catalogue

In [None]:
harmonisor_1900_1999 = DynamicHomogenisor(pp_catalogue_1900_1999, logging=True)
harmonisor_2000_2009 = DynamicHomogenisor(pp_catalogue_2000_2009, logging=True)
harmonisor_2010_2014 = DynamicHomogenisor(pp_catalogue_2010_2014, logging=True)
harmonisor_2015_2021 = DynamicHomogenisor(pp_catalogue_2015_2021, logging=True)
harmonisor_2021_2023 = DynamicHomogenisor(pp_catalogue_2021_2023, logging=True)
harmonisor_iscgem = DynamicHomogenisor(pp_catalogue_iscgem, logging=True)

homogenised_catalogue_1900_1999 = harmonisor_1900_1999.homogenise(magnitude_rules, origin_rules)
homogenised_catalogue_2000_2009 = harmonisor_2000_2009.homogenise(magnitude_rules, origin_rules)
homogenised_catalogue_2010_2014 = harmonisor_2010_2014.homogenise(magnitude_rules, origin_rules)
homogenised_catalogue_2015_2021 = harmonisor_2015_2021.homogenise(magnitude_rules, origin_rules)
homogenised_catalogue_2021_2023 = harmonisor_2021_2023.homogenise(magnitude_rules, origin_rules)
homogenised_catalogue_iscgem = harmonisor_iscgem.homogenise(magnitude_rules, origin_rules)

In [None]:
print("ISC Reviewed Bulleting Catalogue 1900-1999 contains: %d events" % homogenised_catalogue_1900_1999.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2000-2009 contains: %d events" % homogenised_catalogue_2000_2009.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2010-2014 contains: %d events" % homogenised_catalogue_2010_2014.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2015-2021 contains: %d events" % homogenised_catalogue_2015_2021.get_number_events())
print("ISC Reviewed Bulleting Catalogue 2021-2023 contains: %d events" % homogenised_catalogue_2021_2023.get_number_events())

As logging was enabled, we can dump the log to a csv file and explore which rules and which hierarchy was applied for each event

In [None]:
from datetime import datetime

# Get the current date and time
current_datetime = datetime.now().strftime('%Y.%m.%d') #_{current_datetime}

log_file = f"outputs/1900-2021-isc-rb-homogenisor_log.csv"
if os.path.exists(log_file):
    os.remove(log_file)

harmonisor_1900_1999.dump_log(log_file)

log_file = f"outputs/2000-2009-isc-rb-homogenisor_log.csv"
if os.path.exists(log_file):
    os.remove(log_file)

harmonisor_2000_2009.dump_log(log_file)

log_file = f"outputs/2010-2014-isc-rb-homogenisor_log.csv"
if os.path.exists(log_file):
    os.remove(log_file)

harmonisor_2010_2014.dump_log(log_file)

log_file = f"outputs/2015-2021-isc-rb-homogenisor_log.csv"
if os.path.exists(log_file):
    os.remove(log_file)

harmonisor_2015_2021.dump_log(log_file)

log_file = f"outputs/2021-2023-isc-rb-homogenisor_log.csv"
if os.path.exists(log_file):
    os.remove(log_file)

harmonisor_2021_2023.dump_log(log_file)

# log_file = f"outputs/iscgem-homogenisor_log_{current_datetime}.csv"
# if os.path.exists(log_file):
#     os.remove(log_file)

# harmonisor_iscgem.dump_log(log_file)

### Export the Homogenised Catalogue to CSV

In [None]:
output_catalogue_file = f"outputs/1900-1999-isc-rb-homogeneous_catalogue.csv"
if os.path.exists(output_catalogue_file):
    os.remove(output_catalogue_file)
harmonisor_1900_1999.export_homogenised_to_csv(output_catalogue_file)

output_catalogue_file = f"outputs/2000-2009-isc-rb-homogeneous_catalogue.csv"
if os.path.exists(output_catalogue_file):
    os.remove(output_catalogue_file)
harmonisor_2000_2009.export_homogenised_to_csv(output_catalogue_file)

output_catalogue_file = f"outputs/2010-2014-isc-rb-homogeneous_catalogue.csv"
if os.path.exists(output_catalogue_file):
    os.remove(output_catalogue_file)
harmonisor_2010_2014.export_homogenised_to_csv(output_catalogue_file)

output_catalogue_file = f"outputs/2015-2021-isc-rb-homogeneous_catalogue.csv"
if os.path.exists(output_catalogue_file):
    os.remove(output_catalogue_file)
harmonisor_2015_2021.export_homogenised_to_csv(output_catalogue_file)

output_catalogue_file = f"outputs/2021-2023-isc-rb-homogeneous_catalogue.csv"
if os.path.exists(output_catalogue_file):
    os.remove(output_catalogue_file)
harmonisor_2021_2023.export_homogenised_to_csv(output_catalogue_file)

# output_catalogue_file = f"outputs/1900-2021-isc-gem-homogeneous_catalogue_{current_datetime}.csv"
# if os.path.exists(output_catalogue_file):
#     os.remove(output_catalogue_file)
# harmonisor_iscgem.export_homogenised_to_csv(output_catalogue_file)

In [None]:
'''
# Remove stuff after the 19th comma/column so the dataframes can be uniform.
'''

# input_csv_file = r'outputs/1900-1999-isc-rb-homogeneous_catalogue.csv'
# output_csv_file = r'outputs/test/1900-1999-isc-rb-homogeneous_catalogue.csv'

# input_csv_file = r'outputs/2000-2009-isc-rb-homogeneous_catalogue.csv'
# output_csv_file = r'outputs/test/2000-2009-isc-rb-homogeneous_catalogue.csv'

# input_csv_file = r'outputs/2010-2014-isc-rb-homogeneous_catalogue.csv'
# output_csv_file = r'outputs/test/2010-2014-isc-rb-homogeneous_catalogue.csv'

# input_csv_file = r'outputs/2015-2021-isc-rb-homogeneous_catalogue.csv'
# output_csv_file = r'outputs/test/2015-2021-isc-rb-homogeneous_catalogue.csv'

input_csv_file = r'outputs/2021-2023-isc-rb-homogeneous_catalogue.csv'
output_csv_file = r'outputs/test/2021-2023-isc-rb-homogeneous_catalogue.csv'

def remove_text_after_19th_comma(line):
    # Split the line into comma-separated values
    values = line.split(',')

    # Check if there are at least 19 commas in the line
    if len(values) >= 20:
        # Keep only the first 19 values (before and including the 19th comma)
        values = values[:19]

    # Join the values back together with commas
    new_line = ','.join(values)

    return new_line

# Read the input CSV file and process each line
with open(input_csv_file, 'r') as infile, open(output_csv_file, 'w') as outfile:
    for line in infile:
        # Remove text after the 19th comma for each line
        new_line = remove_text_after_19th_comma(line.strip())

        # Write the modified line to the new output CSV file
        outfile.write(new_line + '\n')


In [None]:
import pandas as pd

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df_1900_1999 = pd.read_csv(r'outputs/test/1900-1999-isc-rb-homogeneous_catalogue.csv')
df_2000_2009 = pd.read_csv(r'outputs/test/2000-2009-isc-rb-homogeneous_catalogue.csv')
df_2010_2014 = pd.read_csv(r'outputs/test/2010-2014-isc-rb-homogeneous_catalogue.csv')
df_2015_2021 = pd.read_csv(r'outputs/test/2015-2021-isc-rb-homogeneous_catalogue.csv')
df_2021_2023 = pd.read_csv(r'outputs/test/2021-2023-isc-rb-homogeneous_catalogue.csv')


# Concatenate the dataframes vertically
df_1900_2023 = pd.concat([df_1900_1999, df_2000_2009, df_2010_2014, df_2015_2021, df_2021_2023], ignore_index=True)

# Save the dataframe to a CSV file
df_1900_2023.to_csv(r'outputs/test/1900-2023-isc-rb-homogeneous_catalogue.csv', index=False)

In [None]:
df_1900_2023.head()

In [None]:
df_iscgem = pd.read_csv(r'inputs/isc-gem-cat.csv')
df_iscgem.head()

In [None]:
# Get the list of column names
columns_to_drop = df_iscgem.columns[-6:]

# Drop the last 6 columns by column names
df_iscgem = df_iscgem.drop(columns=columns_to_drop)

df_iscgem.head()

In [None]:
iscgem_events_df = df_iscgem["eventID"]

df_1900_2023_copy = df_1900_2023.copy()
print("1900-2023", len(df_1900_2023_copy))

df_1900_2023_copy = df_1900_2023_copy[~df_1900_2023_copy["eventID"].isin(iscgem_events_df)]

df_iscgem_extended = pd.concat([df_1900_2023_copy, df_iscgem], ignore_index=True)

df_iscgem_extended = df_iscgem_extended.sort_values(by=['second'])
df_iscgem_extended = df_iscgem_extended.sort_values(by=['minute'])
df_iscgem_extended = df_iscgem_extended.sort_values(by=['hour'])
df_iscgem_extended = df_iscgem_extended.sort_values(by=['day'])
df_iscgem_extended = df_iscgem_extended.sort_values(by=['month'])
df_iscgem_extended = df_iscgem_extended.sort_values(by=['year'])

print("1900-2023 extended", len(df_iscgem_extended ))

In [None]:
df_iscgem_extended .to_csv(r'outputs/test/1900-2023-isc-gem-extended-catalogue.csv', index=False)
df_iscgem_extended 

In [None]:
print(len(df_1900_1999))
print(len(df_2000_2009))
print(len(df_2010_2014))
print(len(df_2015_2021))
print(len(df_2021_2023))

In [None]:
import os
import pandas as pd

In [None]:
csv_file_path = os.getcwd() + r'\inputs\datetime-test.csv'
csv_file_path

In [None]:
# Replace 'your_file.csv' with the actual path to your CSV file.
csv_file_path = os.getcwd() + r'\inputs\datetime-weatherill.csv'

# Use read_csv() to read the CSV file and create a DataFrame.
df = pd.read_csv(csv_file_path)


# Create a pandas datetime object by combining the individual columns.
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute', 'second']])

# Get the Unix timestamp and store it in a new column 'unix_timestamp'.
df['unix_weatherill-timestamp'] = df['datetime'].apply(lambda x: x.timestamp())

# Now you have a new column 'unix_timestamp' containing the Unix timestamps.
df

# Define the filename for the CSV file.
csv_file_outpath = os.getcwd() + r'\outputs'
csv_filename = "weatherill-unix.csv"

# Concatenate the current directory and the filename to get the full file path.
output_path = os.path.join(csv_file_outpath , csv_filename)

# Save the DataFrame as a CSV file in the specified output path.
df.to_csv(output_path, index=False)