## Import Dependencies

In [1]:
import sys
import zipfile
import os
import numpy as np
import matplotlib.pyplot as plt
from torch import tensor, cuda
import pandas as pd
import requests


### Add utils to sys to import our custom scripts
**Note:** This notebook should be run from the `examples` folder to ensure the correct imports and file paths are used.

In [2]:
# Go up one level from 'examples' and point to 'utils' folder
utils_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'utils'))
# Add the 'utils' folder to sys.path
sys.path.append(utils_path)
models_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'models'))
# Add the 'models' folder to sys.path
sys.path.append(models_path)
import data_utils
import model_utils



### Load the model

Model can be loaded through the model_selector() function. Available architectures are: "UNet", "UNetPlusPlus", "DeepLabV3", "SwinUNet", and PhaseNet. If you wish to load the weights generated by the article, you can set the pretrained argument to True, it automatically downloads the weights from [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15098817.svg)](https://doi.org/10.5281/zenodo.15098817) (weights are only available for input size N=256). Number of parameters for each model at each input size evaluated in the article is shown below:

In [3]:
device = "cuda" if cuda.is_available() == True else "cpu"

### Load continuous trace

Data can be downloaded from [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17163020.svg)](https://doi.org/10.5281/zenodo.17163020):

In [8]:
doi = "10.5281/zenodo.17163020"
record_id = doi.split(".")[-1]
metadata_url = f"https://zenodo.org/api/records/{record_id}"
response = requests.get(metadata_url)
metadata = response.json()
files = metadata["files"]


In [9]:
file_to_download = files[4]  # 0: NVCh_10h_continuous_trace.zip - https://zenodo.org/api/records/17163020/files/NVCh_10h_continuous_trace.zip/content
print(file_to_download["key"] )
file_url = file_to_download["links"]["self"]
filename = file_to_download["key"] 
response = requests.get(file_url, stream=True)

with open(filename, "wb") as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)
print(f"Downloaded {filename}")

print(f"Extracting files...")

with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall(os.getcwd())  # or use a specific path
print(f"Files extracted")

NVCh_10h_continuous_trace.zip
Downloaded NVCh_10h_continuous_trace.zip
Extracting files...
Files extracted


After downloading we load the full seismic trace and the reference dataframe

In [None]:
filename = "NVCh_10h_continuous_trace.npy"
continuous_trace = np.load(filename)

Timing 

In [None]:
import time
import numpy as np

batch_size = 1
window_size = 8192
stride = 2000
total_time_list = []
for arch in ["UNet", "UNetPlusPlus", "SwinUNet", "DeepLabV3"]:
    model = model_utils.model_selector(arch=arch, N=256, pretrained=True).to(device)
    for stride in [7000,6000,5000,4000,3000,2000,1000,500,250]:
        activation_buffer = np.zeros([6, continuous_trace.shape[1]])
        n_windows = ((continuous_trace.shape[1] - window_size) // stride) + 1
        print_interval = int(n_windows / 5)

        _ = model.eval()
        window_index = 0

        # start timing
        start_time = time.time()

        for batch in data_utils.generate_overlapping_batches(
            continuous_trace, window_size=window_size, stride=stride, batch_size=batch_size
        ):
            X = data_utils.patch_stacking_X(tensor(batch[:, 1:9, :])).to(device)
            output = model(X)
            output = data_utils.activation_unstacking(output, window_size, 256, 6)
            for idx in range(len(output)):
                offset = window_index * stride            
                probabilities = output[idx].detach().cpu().numpy()
                activation_buffer[:, offset:offset+window_size] += probabilities
                window_index += 1
                # if window_index % print_interval == 0:
                #     print("windows processed:", window_index, "/", n_windows)

        end_time = time.time()

        total_time = end_time - start_time
        avg_time_per_window = total_time / window_index

        total_time_list.append({"stride":stride,
                                "arch":arch,
                                "total_time":total_time,
                                "avg_time_per_window":avg_time_per_window})
        print(f"{arch} - Total time: {total_time:.2f} seconds")
        print(f"{arch} - Average time per window: {avg_time_per_window:.4f} seconds")


In [12]:
total_time_df = pd.DataFrame(total_time_list)

Time [ms] to process a single window (average accross strides 2,5 to 70 seconds)

In [29]:
agg_df = total_time_df.groupby("arch").agg({
    "avg_time_per_window": ["max", "mean", "min"]
})
agg_df = agg_df*1000#.round(3)
agg_df = agg_df.round(2)

print(agg_df)

             avg_time_per_window             
                             max   mean   min
arch                                         
DeepLabV3                   20.0  14.33  13.0
SwinUNet                    29.0  23.33  22.0
UNet                        10.0   7.22   6.0
UNetPlusPlus                15.0  11.89  11.0


Total time [s] to process the entire 10-hour continuous trace (average accross strides 2,5 to 70 seconds)

In [26]:

agg_df = total_time_df.groupby("arch").agg({
    "total_time": ["max", "mean", "min"],
})
agg_df= agg_df.round(2)

print(agg_df)

             total_time              
                    max   mean    min
arch                                 
DeepLabV3        212.73  49.49   9.40
SwinUNet         339.02  80.56  15.02
UNet             101.87  24.29   4.06
UNetPlusPlus     164.10  39.96   7.01
