In [12]:
from obspy.core.util.attribdict import AttribDict
from obspy.core.inventory import Inventory, read_inventory
from infrapy.detection import beamforming_new
from pathlib import Path
import pandas as pd
from obspy import read
import re
import glob
import os

def sac_file_parser(filepath):
    filename = os.path.basename(filepath)
    parts = filename.split('.')
    network = parts[0]
    station = parts[1]
    channel = parts[2]
    times = ''.join(parts[3:-1])  # Exclude the extension part
    return network, station, channel, times

def sac_path_grouping(folder_path):
    # Get a list of all .sac files in the folder
    sac_files = list(Path(folder_path).rglob('*.sac'))

    # Dictionary to hold the grouped paths
    grouped_paths = {}

    # Parse and group the file paths
    for sac_file in sac_files:
        network, station, channel, times = sac_file_parser(sac_file.name)
        key = (network, channel, times)
        grouped_path = f"{sac_file.parent}/{network}.I06H*.{channel}.{times}.sac"
        grouped_paths[key] = grouped_path

    # Create a DataFrame from the grouped paths
    df_grouped_paths = pd.DataFrame(list(grouped_paths.values()), columns=['GroupedFilePath'])
    
    return df_grouped_paths

In [16]:
folder_path = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_SAC'
df_grouped_file_paths = sac_path_grouping(folder_path)
print(df_grouped_file_paths)

                                       GroupedFilePath
0    /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
1    /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
2    /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
3    /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
4    /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
..                                                 ...
431  /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
432  /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
433  /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
434  /run/media/viblab/Markov2/Haykal/AnakKrakatauE...
435  /run/media/viblab/Markov2/Haykal/AnakKrakatauE...

[436 rows x 1 columns]


In [21]:
import sys
sys.path.append('/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/')
from src.utils import *

sac_path_grouping('/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_SAC')

Unnamed: 0,GroupedFilePath
0,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
1,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
2,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
3,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
4,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
...,...
431,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
432,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
433,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...
434,/run/media/viblab/Markov2/Haykal/AnakKrakatauE...


In [10]:
import os
import pandas as pd
from infrapy.utils.data_io import json_to_detection_list

def extract_det_from_csv(csv_folder, detection_file, output_folder):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load the detection file
    detections = json_to_detection_list(detection_file)

    # Get a list of all CSV files in the folder
    csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')]

    # Process each CSV file
    for file in csv_files:
        file_path = os.path.join(csv_folder, file)

        # Read the CSV file, assuming the first column contains the timestamp
        df = pd.read_csv(file_path, index_col=0)
        df.index = pd.to_datetime(df.index)  # Convert the index to datetime

        # Iterate over each detection
        for i, det in enumerate(detections):
            # Define start and end times for slicing
            t_start = pd.to_datetime(det.peakF_UTCtime) + pd.to_timedelta(det.start, 's')
            t_end = pd.to_datetime(det.peakF_UTCtime) + pd.to_timedelta(det.end, 's')

            # Filter the DataFrame for the given detection time range
            trimmed_df = df[(df.index >= t_start) & (df.index <= t_end)]

            # Check if the trimmed DataFrame is empty
            if trimmed_df.empty:
                print(f"No data in range for {file} detection {i}, skipping...")
                continue

            # Save the trimmed DataFrame to a new CSV file in the output folder
            trimmed_filename = os.path.join(output_folder, f"{file.split('.')[0]}_det_{i}_trimmed.csv")
            trimmed_df.to_csv(trimmed_filename, index=True)
            print(f"Saved trimmed data to {trimmed_filename}")

# Example usage of the function
csv_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_CSV'
detection_file = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/Trimmed_Confirmed_Events.json'
output_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/processed/I06AU_DETS'
extract_det_from_csv(csv_folder, detection_file, output_folder)


No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 0, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 1, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 2, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 3, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 4, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 5, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 6, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 7, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 8, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 9, skipping...


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [13]:
import os
import pandas as pd
from infrapy.utils.data_io import json_to_detection_list

def extract_det_from_csv(csv_folder, detection_file, output_folder):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Load the detection file
    detections = json_to_detection_list(detection_file)

    # Get a list of all CSV files in the folder
    csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')]

    # Process each CSV file
    for file in csv_files:
        file_path = os.path.join(csv_folder, file)

        # Read the CSV file, assuming the first column contains the timestamp
        df = pd.read_csv(file_path, index_col=0)
        df.index = pd.to_datetime(df.index)  # Convert the index to datetime

        # Iterate over each detection
        for i, det in enumerate(detections):
            # Define start and end times for slicing
            t_start = pd.to_datetime(det.peakF_UTCtime) + pd.to_timedelta(det.start, 's')
            t_end = pd.to_datetime(det.peakF_UTCtime) + pd.to_timedelta(det.end, 's')

            # Filter the DataFrame for the given detection time range
            trimmed_df = df[(df.index >= t_start) & (df.index <= t_end)]

            # Check if the trimmed DataFrame is empty and skip if true
            if trimmed_df.empty:
                print(f"No data in range for {file} detection {i}, skipping...")
                continue

            # Format times for the output filename
            t_start_str = t_start.strftime('%Y%m%d_%H%M%S')
            t_end_str = t_end.strftime('%Y%m%d_%H%M%S')

            # Save the trimmed DataFrame to a new CSV file in the output folder
            trimmed_filename = os.path.join(output_folder, f"{file.split('.')[0]}_det_{i}_{t_start_str}_to_{t_end_str}.csv")
            trimmed_df.to_csv(trimmed_filename, index=True)
            print(f"Saved trimmed data to {trimmed_filename}")

# Example usage of the function
csv_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/raw/I06AU/I06AU_CSV'
detection_file = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/Trimmed_Confirmed_Events.json'
output_folder = '/run/media/viblab/Markov2/Haykal/AnakKrakatauEWS/data/processed/I06AU_DETS'
extract_det_from_csv(csv_folder, detection_file, output_folder)


No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 0, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 1, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 2, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 3, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 4, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 5, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 6, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 7, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 8, skipping...
No data in range for IM.I06H_..BDF__20181024T000000Z__20181025T000000Z.csv detection 9, skipping...


In [3]:
from src.utils.converter import *

In [4]:
sac_file_parser

<function src.utils.converter.sac_file_parser(filepath)>