In [2]:
import numpy as np
from signal_model import NeuroNet, load_yaml
import re
from pathlib import Path
from dataset.signal_dataset import SignalDataset
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report

In [3]:


sample_rate = 1562500
channel_1 = 'ch1'
channel_2 = 'ch2'
channel_3 = 'ch3'
signal_data_dir = "/mnt/home2/Motor_project/AE_PETR_loziska/"
train_config = ([{"label": (int(i.stem) - 1) // 4,
                  "channels": len(list(i.glob('*' + channel_1 + '.bin'))),
                  "interval": [0, int(4.5 * sample_rate)],
                  "bin_path": list(i.glob('*' + channel_1 + '.bin'))[0]}
                 for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)]
                +
                [{"label": (int(i.stem) - 1) // 4,
                  "channels": len(list(i.glob('*' + channel_2 + '.bin'))),
                  "interval": [0, int(4.5 * sample_rate)],
                  "bin_path": list(i.glob('*' + channel_2 + '.bin'))[0]}
                 for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)]
                +
                [{"label": (int(i.stem) - 1) // 4,
                  "channels": len(list(i.glob('*' + channel_3 + '.bin'))),
                  "interval": [0, int(4.5 * sample_rate)],
                  "bin_path": list(i.glob('*' + channel_3 + '.bin'))[0]}
                 for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)])

test_config = ([{"label": (int(i.stem) - 1) // 4,
                 "channels": len(list(i.glob('*' + channel_1 + '.bin'))),
                 "interval": [int(4.5 * sample_rate), int(5 * sample_rate)],
                 "bin_path": list(i.glob('*' + channel_1 + '.bin'))[0]}
                for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)]
               +
               [{"label": (int(i.stem) - 1) // 4,
                 "channels": len(list(i.glob('*' + channel_2 + '.bin'))),
                 "interval": [int(4.5 * sample_rate), int(5 * sample_rate)],
                 "bin_path": list(i.glob('*' + channel_2 + '.bin'))[0]}
                for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)]
               +
               [{"label": (int(i.stem) - 1) // 4,
                 "channels": len(list(i.glob('*' + channel_3 + '.bin'))),
                 "interval": [int(4.5 * sample_rate), int(5 * sample_rate)],
                 "bin_path": list(i.glob('*' + channel_3 + '.bin'))[0]}
                for i in Path(signal_data_dir).glob('*') if re.search(r'\d$', i.stem)])

In [10]:
noise_config = {"intensity": 1,
                "bin_path": Path("/mnt/home2/Motor_project/AE_PETR_loziska/prevodovka/AE-DATA-conti-7394086655261458-ch1.bin")}

train_set = SignalDataset(step=10000, window_size=10000, bin_setup=train_config, noise = noise_config, source_dtype="float32")
test_set = SignalDataset(step=10000, window_size=10000, bin_setup=test_config, source_dtype="float32")


In [9]:
len(train_set)

442386

In [13]:
def get_mean_and_std(dataset):
    loader = DataLoader(dataset, batch_size=64, shuffle=False)
    
    mean = 0.0
    std = 0.0
    nb_samples = 0
    
    for data in loader:
        batch_samples = data[0].shape[0] # batch size (the last batch can have smaller size)
        # data = data.view(batch_samples, data[0].shape(1), -1)
        mean += data[0]._mean(2).sum(0)
        std += data[0]._std(2).sum(0)
        nb_samples += batch_samples
    print(nb_samples)
    mean /= nb_samples
    std /= nb_samples
    
    return mean, std

# Assuming your dataset is created and named custom_dataset
mean, std = get_mean_and_std(train_set)

print(f'Mean: {mean}')
print(f'Standard Deviation: {std}')

32802
Mean: tensor([0.0011])
Standard Deviation: tensor([0.0229])


In [12]:
test_mean, test_std = get_mean_and_std(test_set)
print(f'Mean: {test_mean}')
print(f'Standard Deviation: {test_std}')

16401
Mean: tensor([0.0002])
Standard Deviation: tensor([0.0055])


In [39]:
torch.from_numpy(train_set_set[0][0])- mean

tensor([[-0.0001, -0.0011, -0.0033,  ..., -0.0021, -0.0004,  0.0006]])

In [40]:
train_set[0][0]

array([[-0.00395331, -0.00288655, -0.00213353, ...,  0.01763303,
         0.01750753,  0.01913906]], dtype=float32)

In [None]:

nn_config = load_yaml(Path("nn_configs/CNN_spec.yaml"))
neuro_net = NeuroNet(config=nn_config, tensorboard=True)

neuro_net.train(train_set)



In [None]:
test_dataloader = DataLoader(test_set, batch_size=512, shuffle=False)
outputs = np.empty((0,), dtype=np.float32)
targets = np.empty((0,), dtype=np.longdouble).flatten()
for i, (input, target) in enumerate(test_dataloader):
    input, target = input.numpy(), target.numpy()
    output = neuro_net.predict(input)
    outputs = np.concatenate((outputs, output), axis=0)
    targets = np.concatenate((targets, target), axis=0)
neuro_net.plot_confusion_matrix(outputs, targets)

In [None]:
cr = classification_report(targets, outputs, zero_division=0)