In this notebook, we will compute the maximum and minimum value of the training dataset, which will be used to normalize the envelope. 

In [3]:
import numpy as np

from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [10, 5]
mpl.rcParams['figure.dpi'] = 300

from tqdne.conf import Config
config = Config()
dataset_path = config.datasetdir / config.data_train
dataset_path

PosixPath('/store/sdsc/sd28/data/GM0-dataset-split/data_train.h5')

In [4]:
import h5py

def load_waveforms_from_h5_dataset(file_path, n_samples=100000):
    with h5py.File(file_path, 'r') as f:
        waveforms = f['waveform']
        n_tot_samples = waveforms.shape[0]
        if n_samples > n_tot_samples:
            n_samples = n_tot_samples
        samples_indexes = np.random.choice(n_tot_samples, n_samples, replace=False)
        waveforms = waveforms[np.sort(samples_indexes)]
    return waveforms    

waveforms = load_waveforms_from_h5_dataset(dataset_path, n_samples=1e9)
waveforms.shape

(196608, 3, 5501)

In [5]:
waveforms = np.transpose(waveforms, (1, 2, 0))
waveforms.shape

(3, 5501, 196608)

In [7]:
n = waveforms.shape[-1]
waveforms = np.nan_to_num(waveforms, nan=0)

mean_signal = np.mean(waveforms, axis=2)
std_dev_signal = np.std(waveforms, axis=2)
max_signal = np.max(waveforms, axis=2)
min_signal = np.min(waveforms, axis=2)

mean = np.mean(waveforms, axis=(1,2))
std = np.std(waveforms, axis=(1,2))
max = np.max(waveforms, axis=(1,2))
min = np.min(waveforms, axis=(1,2))

n, mean_signal.shape, std_dev_signal.shape, max_signal.shape, min_signal.shape, mean.shape, std.shape, max.shape, min.shape

(196608, (3, 5501), (3, 5501), (3, 5501), (3, 5501), (3,), (3,), (3,), (3,))

In [9]:
num_channels = waveforms.shape[0]
signal_statistics = {}
for i in range(num_channels):
    signal_statistics[f'ch{i+1}'] = {'mean_signal': mean_signal[i], 'std_dev_signal': std_dev_signal[i], 'max_signal': max_signal[i], 'min_signal': min_signal[i], 'mean': mean[i], 'std_dev': std[i], 'max': max[i], 'min': min[i]}

signal_statistics['ch1']['mean_signal'].shape, signal_statistics['ch1']['std_dev_signal'].shape, signal_statistics['ch1']['max_signal'].shape, signal_statistics['ch1']['min_signal'].shape, signal_statistics['ch1']['mean'], signal_statistics['ch1']['std_dev'], signal_statistics['ch1']['max'], signal_statistics['ch1']['min']

((5501,),
 (5501,),
 (5501,),
 (5501,),
 3.1589977e-08,
 0.0417644,
 18.22163,
 -15.233168)

In [None]:
import pickle

with open('signal_statistics.pkl', 'wb') as f:
    pickle.dump(signal_statistics, f)