In [None]:
############################## Purpose ##############################
# Show how to use instance dataset to train a CNN binary classifier #
#####################################################################

In [1]:
from utils.dataloaders.EventDetectionInstanceDataset import EventDetectionInstanceDataset
from utils.dataloaders.InstanceDataset import InstanceDataset
from utils.generic_trainer import train_detection_only
from utils.plot import plot_error_and_accuracy
from utils.correct_counter import correct
import shutil

# Autre proposition pour le CNN

from nn.cnn import CNN
from nn.conv_block import ConvBlock

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F


In [2]:
event_hdf5_file = "data/instance_samples/Instance_events_counts_10k.hdf5"
event_metadata_file = "data/instance_samples/metadata_Instance_events_10k.csv"
noise_hdf5_file = "data/instance_samples/Instance_noise_1k.hdf5"
noise_metadata_file = "data/instance_samples/metadata_Instance_noise_1k.csv"

split_percentage=[0.85, 0.05, 0.1]

train_dataset = EventDetectionInstanceDataset(event_hdf5_file, event_metadata_file, noise_hdf5_file, noise_metadata_file, "binary", split_index=0, split_percentage=split_percentage, padding_type="sample", padding_value=100)
val_dataset = EventDetectionInstanceDataset(event_hdf5_file, event_metadata_file, noise_hdf5_file, noise_metadata_file, "binary", split_index=1, split_percentage=split_percentage, padding_type="sample", padding_value=100)
test_dataset = EventDetectionInstanceDataset(event_hdf5_file, event_metadata_file, noise_hdf5_file, noise_metadata_file, "binary", split_index=2, split_percentage=split_percentage, padding_type="sample", padding_value=100)

print(f"Dataset size: Train={len(train_dataset)} - Val={len(val_dataset)} - Test={len(test_dataset)}")

data, target = train_dataset[0]
print(f"Earthquake Data shape: {data.shape}")
print(f"Earthquake Target: {target}")


data, target = train_dataset[len(train_dataset) - 1]
print(f"Noise Data shape: {data.shape}")
print(f"Noise Target: {target}")


Dataset size: Train=9350 - Val=550 - Test=1100
Earthquake Data shape: torch.Size([3, 12000])
Earthquake Target: 1
Noise Data shape: torch.Size([3, 12000])
Noise Target: 0


In [6]:
#Testing model shape and layers compatibility

model = CNN(
    input_channels=3,
    conv_channels= [
        8, 16, 32, 64, 128
    ], kernel_sizes=[
        11, 9, 7, 5, 3
    ], mlp_layers=[
        128, 64, 32, 2
    ],
    dropout=0.4
)

print(model)
print("\nShape of intermediate features through the network:")
dummy = []
for i in range(5):
    data, target = train_dataset[i]
    dummy.append(data)

dummy = torch.stack(dummy)

print(f"Input: {tuple(dummy.shape)}")

for layer in model.conv_layers:
    print("Test ======")
    dummy = layer(dummy)
    if isinstance(layer, ConvBlock):
        print(f"  ->  {layer} -> {tuple(dummy.shape)}")
dummy = model.aggregation(dummy)
print(f"  ->  {model.aggregation} -> {tuple(dummy.shape)}")
dummy = dummy.view(dummy.size(0), -1)
print(f"  ->  Reshape -> {tuple(dummy.shape)}")
for layer in model.mlp:
    dummy = layer(dummy)
    if isinstance(layer, nn.Linear):
        print(f"  ->  {layer} -> {tuple(dummy.shape)}")

CNN(
  (conv_layers): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv1d(3, 8, kernel_size=(11,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv1d(8, 16, kernel_size=(9,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): Conv1d(16, 32, kernel_size=(7,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (3): Co

In [7]:
#Testing dataset, dataloader, model and loss compatibility

model = CNN(
    input_channels=3,
    conv_channels= [
        8, 16, 32, 64, 128
    ], kernel_sizes=[
        11, 9, 7, 5, 3
    ], mlp_layers=[
        128, 64, 32, 2
    ],
    dropout=0.4
)

batch = []
y = []
batch_size=5

# initialize loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
)

data = None
actual_event = None

for i, batch in enumerate(train_loader):
    data = batch[0]
    actual_event = batch[1]
    break


print(f"Input: {data.shape}")
print(f"######### Actual class ###############")
print(actual_event)

pred = model(data)

print(f"######### Predictions logit ###############")
print(pred)

loss = nn.CrossEntropyLoss()

print(f"Loss: {loss(pred, actual_event)}")
print(f"Total correct: {correct(pred, actual_event)}")



Input: torch.Size([5, 3, 12000])
######### Actual class ###############
tensor([1, 1, 1, 1, 1])
######### Predictions logit ###############
tensor([[-0.0000e+00,  0.0000e+00],
        [-4.2490e-02,  0.0000e+00],
        [-1.2359e-03,  4.9498e-01],
        [-0.0000e+00, -2.1641e-03],
        [-0.0000e+00,  1.1173e+01]], grad_fn=<MulBackward0>)
Loss: 0.507004976272583
Total correct: 3.0


In [8]:
model = CNN(
    input_channels=3,
    conv_channels= [
        8, 16, 32, 64, 128
    ], kernel_sizes=[
        11, 9, 7, 5, 3
    ], mlp_layers=[
        128, 64, 32, 2
    ],
    dropout=0.4
)

temp_dir = "temp"

loss = nn.CrossEntropyLoss()

e, a, model_path, monitor = train_detection_only(train_dataset, val_dataset, model, loss, correct, batch_size=64, epochs=20, temp_dir=temp_dir)


CNN(
  (conv_layers): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv1d(3, 8, kernel_size=(11,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv1d(8, 16, kernel_size=(9,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (2): ConvBlock(
      (block): Sequential(
        (0): Conv1d(16, 32, kernel_size=(7,), stride=(1,))
        (1): LeakyReLU(negative_slope=0.01)
        (2): Identity()
        (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Dropout(p=0.4, inplace=False)
      )
    )
    (3): Co

Exception in thread Thread-4 (_monitor):
Traceback (most recent call last):
  File "/Users/ekabore/anaconda3/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "/Users/ekabore/Documents/personal/school/H2024/IFT6759/earthquake/env/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 761, in run_closure
    _threading_Thread_run(self)
  File "/Users/ekabore/anaconda3/lib/python3.11/threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/ekabore/Documents/personal/school/H2024/IFT6759/earthquake/utils/gpu_monitoring.py", line 59, in _monitor
    utilization = gpu_util()
                  ^^^^^^^^^^
  File "/Users/ekabore/Documents/personal/school/H2024/IFT6759/earthquake/utils/gpu_monitoring.py", line 14, in gpu_util
    result = subprocess.run(
             ^^^^^^^^^^^^^^^
  File "/Users/ekabore/anaconda3/lib/python3.11/subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^

Epoch 1/20 - Train error: 0.5856 Train acc: 74.9% - Val error: 0.3760 Val acc: 90.9%                                                
Epoch 2/20 - Train error: 0.5376 Train acc: 75.8% - Val error: 0.3991 Val acc: 90.9%                                                                                                                    
Epoch 3/20 - Train error: 1.2645 Train acc: 71.4% - Val error: 3.3267 Val acc: 90.9%                                                                                                                    
Epoch 4/20 - Train error: 321.0224 Train acc: 59.4% - Val error: 5.3836 Val acc: 90.9%                                                                                                                  
Epoch 5/20 - Train error: 589.9654 Train acc: 59.1% - Val error: 0.7327 Val acc: 90.9%                                                                                                                  
Epoch 6/20 - Train error: 101.6658 Train acc: 57.5% - Val error

In [10]:
print(e)
print(a)

{'train': [0.5856248837749974, 0.5376471167518979, 1.2644783732842426, 321.022350099622, 589.9654267531674, 101.66575787343136, 33.43796490406503, 70.62776178243209, 15.770300658178979, 14.25751043136428, 13.386756981311201, 5.099807813054039, 5.119657426058841, 2.907810773573765, 3.4878672402934963, 5.582325924821451, 6.083967663601142, 4.304614238998517, 68.17021813816359, 81.34136093676496], 'val': [0.37602370646264816, 0.39905362990167403, 3.326728890132573, 5.383646172781785, 0.732723188896974, 1.5895029654105504, 1.3876266380151112, 2.039011574453778, 1.567725055747562, 1.30262531257338, 0.8758188999361463, 0.8028628337714407, 0.6734844330284331, 0.6494057319230504, 0.8715837953819169, 0.5944798299007945, 0.5268197076188194, 1.025642178952694, 2.786103513505724, 1.4850917466812663]}
{'train': [74.90909090909092, 75.80748663101605, 71.42245989304813, 59.401069518716575, 59.11229946524064, 57.50802139037433, 59.44385026737968, 56.855614973262036, 59.647058823529406, 59.775401069518

In [12]:
figures_output = "output/figures"
final_output_dir="output"

plot_error_and_accuracy(e, a, figures_output)
monitor.save_plots(figures_output)
shutil.copy(model_path, final_output_dir)