In [5]:
import os
import unittest
import tempfile
import numpy as np
import pandas as pd
import toml

from openquake.cat.hmg import merge

BASE_PATH = os.getcwd()

In [8]:
SETTINGS = """

[general]
region_buffer = 5.0
output_path = "{:s}"
output_prefix = "test_"
region_shp = "{:s}"
log_file = "{:s}"

# Catalogues

[[catalogues]]
code = "ISC"
name = "ISC Bulletin"
filename = "{:s}"
type = "isf"
select_region = false

[[catalogues]]
code = "ISC-GEM"
name = "Original ISC-GEM"
filename = "{:s}"
type = "csv"
delta_ll = 0.50
delta_t =  40.0
timezone = 0
buff_ll = 0.0
buff_t = 5.0
use_ids = false
"""

In [13]:
data_path = os.path.join(BASE_PATH, 'inputs')

# Create the temporary folder
tmpd = tempfile.mkdtemp()

# Update settings
# Use toml.load and toml dump to ensure that Windows paths
# are escaped correctly and the resulting TOML file is valid
td = toml.loads(SETTINGS)
td["general"]["output_path"] = tmpd
td["general"]["log_file"] = os.path.join(tmpd, "log.txt")
# td["general"]["region_shp"] = \
#     os.path.join(data_path, "shp", "test_area.shp")
td["catalogues"][0]["filename"] = \
    os.path.join(data_path, "1900-1999-PH_ISF_Catalogue.txt")
td["catalogues"][1]["filename"] = \
    os.path.join(data_path, "isc-gem-cat-3.csv")

# Create settings file
settings = os.path.join(tmpd, "settings.toml")
with open(settings, "w") as fou:
    toml.dump(td, fou)

In [14]:
"""Merging GCMT catalogue"""

# Read the ISF formatted file
print(settings)

# Merge
merge.process_catalogues(settings)

# Reading catalogue
fname = os.path.join(self.tmpd, "test_otab.h5")
odf = pd.read_hdf(fname)
self.assertEqual(len(odf[odf["prime"] == 1]), 635)

C:\Users\ENRICO~1.ABC\AppData\Local\Temp\tmpegqkqe1k\settings.toml

Catalogue: ISC Bulletin
    The original catalogue contains 30322 events
   Catalogue contains: 30322 events
      Building index
   Log file: /tmp/tmp_merge_00.tmp
   Whole catalogue contains: 30322 events

Catalogue: Original GCMT
    The original catalogue contains 3993 events
   Catalogue contains: 3993 events
   Log file: /tmp/tmp_merge_01.tmp


ValueError: Event ID: 914060 already there. Length ids 30347

In [17]:
%matplotlib inline
import os
import numpy as np
import matplotlib.pyplot as plt

# Import the Parsers
from openquake.cat.parsers.isf_catalogue_reader import ISFReader
from openquake.cat.isc_homogenisor import (HomogenisorPreprocessor,
                                   DynamicHomogenisor,
                                   MagnitudeConversionRule)

In [18]:
parser = ISFReader("inputs/1900-2021-PH_ISF_Catalogue.txt",
                   selected_origin_agencies=["EHB", "ISC", "NEIC", "NEID", "GCMT", "GUTE", "PAS"],
                   selected_magnitude_agencies=["EHB", "ISC", "NEIC", "NEID", "GCMT", "GUTE", "PAS"])
catalogue = parser.read_file("ISC_DB1", "ISC Global M >= 5")
print("Catalogue contains: %d events" % catalogue.get_number_events())

Catalogue contains: 51516 events


In [19]:
from openquake.cat.parsers.converters import GenericCataloguetoISFParser

In [20]:
gem_parser = GenericCataloguetoISFParser("inputs/isc-gem-cat-3.csv")
catalogue3 = gem_parser.parse("ISC-GEM", "ISC-GEM-CAT")

print("Catalogue contains: %d events" % catalogue3.get_number_events())

Catalogue contains: 3993 events


In [16]:
from openquake.cat.isc_homogenisor import DuplicateFinder

In [22]:
merger = DuplicateFinder(catalogue, 10, 20, logging=True)

In [23]:
iscgem_isf = merger.merge_catalogue(catalogue3)

After duplicate finding: 51933 events (51933)


In [26]:
iscgem_isf

<openquake.cat.isf_catalogue.ISFCatalogue at 0x2098ed67d00>

In [24]:
origin_df4, magnitude_df4 = iscgem_isf.build_dataframe()

In [25]:
csv_filename
origin_df4.to_csv(csv_filename, index=False)

Unnamed: 0,eventID,originID,Agency,year,month,day,hour,minute,second,time_error,...,semiminor90,error_strike,depth_error,prime,dip1,rake1,str1,dip2,rake2,str2
0,610548604,610548604,ISC-GEM,1905,5,31,18,23,32.750000,0.00,...,24.900000,,5.0,1,,,,,,
1,610548608,610548608,ISC-GEM,1905,7,16,18,46,55.437500,0.00,...,83.800003,,32.0,1,,,,,,
2,610548621,610548621,ISC-GEM,1905,12,10,18,9,58.875000,0.00,...,18.400000,,6.3,1,,,,,,
3,16957889,16957889,ISC-GEM,1906,3,16,22,42,25.109375,0.00,...,0.000000,0.0,6.8,1,,,,,,
4,16957892,16957892,ISC-GEM,1906,3,26,3,28,39.062500,0.00,...,11.300000,,12.3,1,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104065,620802596,19892457,ISC,2021,6,30,15,51,49.468750,1.45,...,6.250000,78.0,14.6,1,0.0,0.0,0.0,0.0,0.0,0.0
104066,620802599,16580981,NEIC,2021,6,30,18,53,43.031250,0.85,...,5.700000,208.0,8.2,0,0.0,0.0,0.0,0.0,0.0,0.0
104067,620802599,19892464,ISC,2021,6,30,18,53,41.000000,0.58,...,6.362000,62.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0
104068,620802602,16580994,NEIC,2021,6,30,20,28,45.468750,1.38,...,12.220000,143.0,5.2,0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
magnitude_df4

Unnamed: 0,eventID,originID,magnitudeID,value,sigma,magType,magAgency
0,610548604,610548604,610548604|ISC-GEM|6.80|Mw,6.80,0.46,Mw,ISC-GEM
1,610548608,610548608,610548608|ISC-GEM|6.06|Mw,6.06,0.51,Mw,ISC-GEM
2,610548621,610548621,610548621|ISC-GEM|6.80|Mw,6.80,0.20,Mw,ISC-GEM
3,16957889,16957889,16957889|ISC-GEM|6.89|Mw,6.89,0.39,Mw,ISC-GEM
4,16957892,16957892,16957892|ISC-GEM|6.13|Mw,6.13,0.20,Mw,ISC-GEM
...,...,...,...,...,...,...,...
101195,620802599,16580981,16580981|NEIC|4.00|mb,4.00,0.10,mb,NEIC
101196,620802599,19892464,19892464|ISC|4.10|mb,4.10,0.10,mb,ISC
101197,620802602,16580994,16580994|NEIC|4.50|mb,4.50,0.10,mb,NEIC
101198,620802602,19892467,19892467|ISC|4.30|mb,4.30,0.20,mb,ISC


In [28]:
# Create the "outputs" folder if it doesn't exist
outputs_folder = os.path.join(os.getcwd(), 'outputs')
os.makedirs(outputs_folder, exist_ok=True)

# Save DataFrame to CSV file in the "outputs" folder
csv_filename = 'merged-iscgem-isf.csv'
csv_file_path = os.path.join(outputs_folder, csv_filename)
origin_df4.to_csv(csv_file_path, index=False)  # Set index=False to exclude the DataFrame index in the CSV file