In [21]:
import os
import sys

# Obtener el directorio actual de la notebook
notebook_dir = os.getcwd()

# Obtener el directorio padre
parent_dir = os.path.abspath(os.path.join(notebook_dir, ".."))

# Agregar el directorio padre al path
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [22]:
from libs.config.config_variables import (
    MAX_WORKERS,
    TIMEOUT_API_REQUEST,
    CATALOG_CSV_DIR,
)

from modules.catalog_scraper.seismic_scraper import SeismicScraper
from modules.downloader.records_downloader import SeismicDownloader

# 0. Configuración básica de catálogos

In [23]:
# Límites geográficos (Perú y alrededores)
MIN_LAT, MAX_LAT = -20.0, 0.0
MIN_LON, MAX_LON = -85.0, -68.0

# Año de inicio del scraping
START_YEAR = 1400

# 1. Ejecución de scraping de catálogos


## 1.1 Carga de datos históricos

In [24]:
# Registrar los catálogos csv
historical_scraper = SeismicScraper(
    max_workers=MAX_WORKERS, timeout=TIMEOUT_API_REQUEST, csv_base_path=CATALOG_CSV_DIR
)
results = historical_scraper.scrape_events_from_csv(
    catalogs=["gcmt", "isc-gem", "sara"],
    # start_year=START_YEAR,
    # min_lat=MIN_LAT,
    # max_lat=MAX_LAT,
    # min_lon=MIN_LON,
    # max_lon=MAX_LON,
)


2025-12-07 10:54:12,716 [INFO] modules.catalog_scraper.seismic_scraper: Starting execution of SeismicScraper.scrape_events_from_csv
2025-12-07 10:54:12,717 [INFO] modules.catalog_scraper.seismic_scraper: Encontrados 1 archivos CSV para GCMT
2025-12-07 10:54:12,718 [INFO] modules.catalog_scraper.seismic_scraper: Encontrados 2 archivos CSV para ISC-GEM
2025-12-07 10:54:12,719 [INFO] modules.catalog_scraper.seismic_scraper: Encontrados 2 archivos CSV para SARA
2025-12-07 10:54:12,720 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando carga desde CSV: 5 archivos con 40 workers
2025-12-07 10:54:12,722 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando carga desde CSV: GCMT - gcmt_data.csv
2025-12-07 10:54:12,744 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando carga desde CSV: ISC-GEM - isc-gem-cat.csv
2025-12-07 10:54:12,746 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando carga desde CSV: ISC-GEM - isc-gem-suppl.csv
2025-12-07 10:54:12,762 [INFO] modules.ca

In [25]:
historical_scraper.metadata

+-----------------+--------+--------------+---------+
| Total procesado | Nuevos | Actualizados | Errores |
+-----------------+--------+--------------+---------+
|     178748      | 178748 |      0       |    1    |
+-----------------+--------+--------------+---------+


## 1.2 Actualización de datos recientes

In [26]:
recent_data_scraper = SeismicScraper(
    max_workers=MAX_WORKERS, timeout=TIMEOUT_API_REQUEST
)

results = recent_data_scraper.scrape_events(
    start_year=2000,
    min_lat=MIN_LAT,
    max_lat=MAX_LAT,
    min_lon=MIN_LON,
    max_lon=MAX_LON,
    catalogs=["usgs", "isc", "igp"],
)

2025-12-07 10:54:25,730 [INFO] modules.catalog_scraper.seismic_scraper: Starting execution of SeismicScraper.scrape_events
2025-12-07 10:54:25,730 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: 78 tareas con 40 workers
2025-12-07 10:54:25,730 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2000
2025-12-07 10:54:25,730 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2001
2025-12-07 10:54:25,761 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2002
2025-12-07 10:54:25,763 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2003
2025-12-07 10:54:25,763 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2004
2025-12-07 10:54:25,779 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2005
2025-12-07 10:54:25,779 [INFO] modules.catalog_scraper.seismic_scraper: Iniciando scraping: USGS - 2006
2025-12-07 10:54:25,795 [INFO] m

In [27]:
recent_data_scraper.metadata

+-----------------+--------+--------------+---------+
| Total procesado | Nuevos | Actualizados | Errores |
+-----------------+--------+--------------+---------+
|      15961      | 15868  |      93      |   15    |
+-----------------+--------+--------------+---------+


## 1.3 Descarga de registros acelerométricos

In [28]:
NUM_EVENTS = 100
PARALLEL_STATIONS = 10
CATALOG_PARSER_CONFIG = {
    "igp": 6000,
    # "usgs": 5,
    # "emsc": 15,
}

In [29]:
downloader = SeismicDownloader()

downloader.process_multiple_catalogs(
    catalogs=CATALOG_PARSER_CONFIG,
    max_workers=MAX_WORKERS,
    parallel_stations=PARALLEL_STATIONS,
)

2025-12-07 10:54:46,472 [INFO] modules.downloader.records_downloader: Starting execution of SeismicDownloader.process_multiple_catalogs
2025-12-07 10:54:46,472 [INFO] modules.downloader.records_downloader: Procesando 1 catálogos
2025-12-07 10:54:46,473 [INFO] modules.downloader.records_downloader:   - IGP: 6000 eventos
2025-12-07 10:54:46,474 [INFO] modules.downloader.records_downloader: Starting execution of SeismicDownloader.process_catalog
2025-12-07 10:54:46,475 [INFO] modules.downloader.records_downloader: Procesando catálogo: IGP (6000 eventos)
2025-12-07 10:54:46,500 [INFO] libs.database.seismic_records: Inicio de sesión de sincronización (record_id=1, catalog_id=148)
2025-12-07 10:55:11,247 [INFO] modules.downloader.records_downloader: Catálogo IGP: 0/5334 eventos, 0 estaciones, 0 muestras
2025-12-07 10:55:11,247 [INFO] modules.downloader.records_downloader: Finished execution of SeismicDownloader.process_catalog in 24.77 seconds
2025-12-07 10:55:11,257 [INFO] modules.downloade

{'success': True,
 'catalogs_processed': 1,
 'results': {'igp': {'success': True,
   'catalog': 'igp',
   'sync_id': 1,
   'status': 'completed_with_errors',
   'total_events': 5334,
   'successful_events': 0,
   'failed_events': 5334,
   'total_stations': 0,
   'total_samples': 0,
   'total_processing_time': 471.3,
   'avg_processing_time': 0.09,
   'error_summary': '2020-0095: Procesamiento fallido; 2021-0143: Procesamiento fallido; IGP_20121120051114: Procesamiento fallido; 2021-0224: Procesamiento fallido; 2020-0432: Procesamiento fallido...',
   'timestamp': '2025-12-07T10:55:11.247441'}},
 'summary': {'total_events': 5334,
  'successful_events': 0,
  'failed_events': 5334,
  'total_stations': 0,
  'total_samples': 0},
 'timestamp': '2025-12-07T10:55:11.257503'}

In [30]:
downloader.metadata

+----------+-----------------------+---------+---------+----------+----------+----------------+------------+----------+------------------+------------------------+---------------------------------------------------------------------------------------------------------+
| Catálogo |        Estado         | Sync ID | Eventos | Exitosos | Fallidos | Tasa Éxito (%) | Estaciones | Muestras | Tiempo Total (s) | Tiempo Prom/Evento (s) |                                            Errores (resumen)                                            |
+----------+-----------------------+---------+---------+----------+----------+----------------+------------+----------+------------------+------------------------+---------------------------------------------------------------------------------------------------------+
|   IGP    | completed_with_errors |    1    |  5334   |    0     |   5334   |      0.0       |     0      |    0     |      471.3       |          0.09          | 2020-0095: Procesamiento f