nonlinloc data conversion script - sources \\
Hanna-Riia Allas \\
Oct 11 2023 \\

This script is for manipulating event data files to create files that could be easily read in by GMT.

In [1]:
# import packages

import numpy as np
import pandas as pd
#pd.set_option('display.max_rows', None)
import os
import glob

In [5]:
# define filepaths

path_to_nnl_outputs = "/home/hra35/Documents/events/all_picks" # directory with pick files from nonlinloc
path_to_events  = "/home/hra35/Documents/events" # directory with all event data
path_to_gmt_files = "/home/hra35/Documents/events/gmt_data" # directory with files for GMT plots
path_to_catalogues = "/home/hra35/Documents/events/catalogues" # directory with complete event catalogues


In [6]:
# CREATE NONLINLOC EVENT DATAFRAME

# make lists of all the nonlinloc output files and pick files in the nonlinloc directory

nll_files = glob.glob(os.path.join(path_to_nnl_outputs, f'*{".hyp"}'))
obs_files = glob.glob(os.path.join(path_to_nnl_outputs, f'*{".obs"}'))

# iterate through the nonlinloc files to get event id-s from filenames
event_id_list = []
for file in nll_files:
    event_id = os.path.splitext(os.path.basename(file))[0]
    event_id_list.append(event_id)
    
# initialise a dataframe
event_df = pd.DataFrame(data=None, index=event_id_list, columns=["event_ID",
                                                                 "lat",
                                                                 "lon",
                                                                 "depth",
                                                                 "RMS",
                                                                 "no_P_picks",
                                                                 "no_S_picks"])

for i in range(len(event_id_list)):
    event_df.loc[event_id_list[i], ["event_ID"]] = [event_id_list[i]]

# read the .hyp files and fill in the dataframe with coordinates and rms values

for file in nll_files:
    with open(file, 'r') as file_to_read:
        lines = file_to_read.readlines()
        for line in lines:
            
            if line.startswith("GEOGRAPHIC"):
                entries = line.split()
                latitude = entries[9]
                longitude = entries[11]
                depth = entries[13]

            if line.startswith("QUALITY"):
                entries = line.split()
                rms = entries[8]
            
    file_name = os.path.splitext(os.path.basename(file))[0]
    event_df.loc[file_name, ["lat", "lon", "depth", "RMS"]] = [latitude, longitude, depth, rms]

# read the .obs files to get the number of P and S picks per event

for file in obs_files:
    df = pd.read_csv(file, delim_whitespace=True, header=None, skiprows=1)
    
    # check how many P picks exist and extract station IDs  
    mask = df.iloc[:, 4] == "P"
    stations_with_P_picks = df[mask].iloc[:, 0]
    no_P = len(stations_with_P_picks)
    
    # check how many S picks exist and extract station IDs  
    mask = df.iloc[:, 4] == "S"
    stations_with_S_picks = df[mask].iloc[:, 0]
    no_S = len(stations_with_S_picks)
    
    file_name = os.path.splitext(os.path.basename(file))[0]
    event_df.loc[file_name, ["no_P_picks", "no_S_picks"]] = [no_P, no_S]

# save the complete structured data into a csv file
event_df.to_csv(os.path.join(path_to_gmt_data, "picks_all.csv"), sep=',', index=False)
event_df = pd.read_csv(os.path.join(path_to_gmt_files, "picks_all.csv"))
event_df['depth'] = event_df['depth']*(-1)
event_df.to_csv(os.path.join(path_to_gmt_files, "picks_all.csv"), sep=',', index=False)

In [None]:
#print(event_df)

In [None]:
# CREATE QM CATALOGUE DATAFRAMES

# list the QM catalogues

cat_20_21 = os.path.join(path_to_catalogues, "events_sorted_2020-21.csv")
cat_21_22 = os.path.join(path_to_catalogues, "events_sorted_2021-22.csv")
cat_22_23 = os.path.join(path_to_catalogues, "events_sorted_2022-23.csv")

catalogues = [cat_20_21, cat_21_22, cat_22_23]

# read the QM catalogues into dataframes

columns_to_read = ["EventID", "DT", "X", "Y", "Z"]
cat_dataframes = {}

for catalogue in catalogues:
    df = pd.read_csv(catalogue, delimiter=',', skiprows=0, usecols=columns_to_read)
    file_name_parts = os.path.splitext(os.path.basename(catalogue))[0].split('_')
    df_name = f'df_{file_name_parts[2]}'
    df = df.rename(columns={'EventID': 'event_ID', 'DT': 'Date', 'X': 'lon', 'Y': 'lat', 'Z': 'depth'})
    df['depth'] = df['depth'] * -1
    cat_dataframes[df_name] = df
    
#print(cat_dataframes['df_2020-21'])

In [8]:
# CHOOSE GMT MAP AXES

gmt_xaxis = 'lon'
gmt_yaxis = 'depth'
#gmt_zaxis = 'depth'


In [9]:
# CREATE FILES TO PLOT PICKED DATA WITH GMT

# export a .xy file in GMT-compatible format for plotting picked data

event_df[[gmt_xaxis, gmt_yaxis]].to_csv(os.path.join(path_to_gmt_files, f"all_picks_{gmt_xaxis}_{gmt_yaxis}.xy"), sep='\t', header=False, index=False)


In [None]:
# CREATE FILES TO PLOT CATALOGUE DATA WITH GMT

# export .xy files in GMT-compatible format for plotting full catalogue data

cat_dataframes['df_2020-21'][[gmt_xaxis,gmt_yaxis]].to_csv(os.path.join(path_to_gmt_data, f"202021_catalogue_{gmt_xaxis}_{gmt_yaxis}.xy"), sep='\t', header=False, index=False)
cat_dataframes['df_2021-22'][[gmt_xaxis,gmt_yaxis]].to_csv(os.path.join(path_to_gmt_data, f"202122_catalogue_{gmt_xaxis}_{gmt_yaxis}.xy"), sep='\t', header=False, index=False)
cat_dataframes['df_2022-23'][[gmt_xaxis,gmt_yaxis]].to_csv(os.path.join(path_to_gmt_data, f"202223_catalogue_{gmt_xaxis}_{gmt_yaxis}.xy"), sep='\t', header=False, index=False)


In [None]:
# CREATE FILES TO PLOT EVENTS FROM A SPECIFIC REGION

# define bounds of data region
lat_max = 64.0
lat_min = 63.75
lon_max = -21.8
lon_min = -22.8


In [10]:
# CREATE FILE TO PLOT STATIONS

path_to_stations = "/home/hra35/Documents/events/station_coordinates.txt"

# read the file into a dataframe
station_df = pd.read_csv(path_to_stations, sep=r'\s+', names = ['stat_ID', 'rm1', 'lat', 'lon', 'elev_km', 'rm2'])
station_df.drop(['rm1', 'rm2', 'elev_km'], axis=1, inplace=True)
new_column_order = ['lon', 'lat', 'stat_ID']
station_df = station_df[new_column_order]

# remove stations that occur twice 
duplicate_mask = station_df.duplicated(subset='stat_ID', keep='last')
station_df = station_df[~duplicate_mask]

# export for GMT
station_df.to_csv("/home/hra35/Documents/gmt_scripts/stations.xy", index=False, header=False)
