In [18]:
import yaml
import h5py
import os
import numpy as np

In [21]:
# Load the YAML configuration file
with open("example.yaml", "r") as yaml_file:
    config = yaml.safe_load(yaml_file)

# Get the data repository and file names
data_repository = config['data']['load']['repository']
events = config['data']['load']['events']
save_filename = config['data']['save']['filename']

# Get the filter parameters
start_hour = config['filter']['start']
end_hour = config['filter']['end']
n_stations = config['filter']['n_stations']

# Get a list of all the events (file paths)
if events == "ALL":
    event_files = [os.path.join(data_repository, f) for f in os.listdir(data_repository) if f.endswith(".h5")]
else:
    event_files = events



In [22]:
# Convert hours to sample indices
start_index = int(start_hour * 3600 * 100)
end_index = int(end_hour * 3600 * 100)

# Initialize the data array with an additional dimension for the distance
data = np.empty((len(event_files), n_stations, 3 + 1, end_index - start_index))

# Iterate over each event file
for event_index, event_file in enumerate(event_files):
    # Open the file
    with h5py.File(event_file, 'r') as f:
        # Get the list of stations
        stations_list = list(f.keys())

        # Select n_stations based on the closest stations (modify this part as needed)
        selected_stations = stations_list[:n_stations]

        # Extract data and distance for each selected station
        for station_index, station in enumerate(selected_stations):
            # Save seismic data
            data[event_index, station_index, :3, :] = f[station][0:3, start_index:end_index]

            # Get station distance
            distance = f[station].attrs['dist_m']

            # Save distance to the data array
            data[event_index, station_index, 3, 0] = distance

# Save the extracted data with distances
np.save(save_filename, data)

In [23]:
# Load the saved data
loaded_data = np.load('test.npy')

# Explore the data
print("Data shape:", loaded_data.shape)
print("Number of events:", loaded_data.shape[0])
print("Number of stations:", loaded_data.shape[1])
print("Number of channels:", loaded_data.shape[2])
print("Number of samples:", loaded_data.shape[3])

# Access a specific event, station, channel, and sample
event_idx = 0
station_idx = 0
channel_idx = 0
sample_idx = 0
value = loaded_data[event_idx, station_idx, channel_idx, sample_idx]
print("Value at event {}, station {}, channel {}, sample {}: {}".format(event_idx, station_idx, channel_idx, sample_idx, value))

# Get the number of events and stations
num_events, num_stations = data.shape[0], data.shape[1]

# Iterate through events and stations to print distances
for event_idx in range(num_events):
    for station_idx in range(num_stations):
        distance = data[event_idx, station_idx, 3, 0]
        print(f"Distance between Event {event_idx} and Station {station_idx}: {distance} meters")



Data shape: (7, 4, 4, 360000)
Number of events: 7
Number of stations: 4
Number of channels: 4
Number of samples: 360000
Value at event 0, station 0, channel 0, sample 0: 182.05828857421875
Distance between Event 0 and Station 0: 120189.0 meters
Distance between Event 0 and Station 1: 149570.0 meters
Distance between Event 0 and Station 2: 149240.0 meters
Distance between Event 0 and Station 3: 128023.0 meters
Distance between Event 1 and Station 0: 86461.0 meters
Distance between Event 1 and Station 1: 22603.0 meters
Distance between Event 1 and Station 2: 111294.0 meters
Distance between Event 1 and Station 3: 129179.0 meters
Distance between Event 2 and Station 0: 132104.0 meters
Distance between Event 2 and Station 1: 139004.0 meters
Distance between Event 2 and Station 2: 137723.0 meters
Distance between Event 2 and Station 3: 128040.0 meters
Distance between Event 3 and Station 0: 115413.0 meters
Distance between Event 3 and Station 1: 115267.0 meters
Distance between Event 3 and 