In [1]:
import os
import torch
import shutil
import numpy as np
from torch import nn
from models import VGG_Style_5Block
from tqdm.auto import tqdm
from itertools import product
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from datasets.scalogramDataset import CustomBrainMNISTSCalogram
min_loss_so_far = float('inf')

modelPath = 'dlPipelineScalo/models/'
tbPath = 'dlPipelineScalo/runs/'
shutil.rmtree(modelPath, ignore_errors=True)
shutil.rmtree(tbPath, ignore_errors=True)
os.makedirs(modelPath)
os.makedirs(tbPath)

In [2]:
tensorDir = 'processedData/channelSlectedBandPassed/npy/eventScalogram.npy'
targetsDir = 'processedData/channelSlectedBandPassed/npy/targets.npy'
npData = np.load(tensorDir)
npTargets = np.load(targetsDir)
randomIndex = np.random.permutation(npTargets.shape[0])
npData = npData[randomIndex]
npTargets = npTargets[randomIndex]
npTargets.shape, npData.shape

((64470,), (64470, 4, 30, 256))

In [3]:
trainExtend = (0, 44470)
validationExtend = (44470, 54470)
testExtend = (54470, 64470)
training_data = CustomBrainMNISTSCalogram(eventArrays=npData, targetsArrays=npTargets, extends=trainExtend)
validation_data = CustomBrainMNISTSCalogram(eventArrays=npData, targetsArrays=npTargets, extends=validationExtend)
testing_data = CustomBrainMNISTSCalogram(eventArrays=npData, targetsArrays=npTargets, extends=testExtend)
len(training_data), len(validation_data), len(testing_data)

(44470, 10000, 10000)

In [4]:
def model_trainer(training_args: dict):
    global min_loss_so_far
    batch_size = int(training_args.get('batch_size', 8))
    device = training_args.get('dev_id')
    lr=float(training_args.get('lr',0.0001))
    epochs = int(training_args.get('num_epochs',10))
    tbPath = training_args['tbPath']
    modelPath = training_args['modelPath']
    # modelParams = training_args['modelParams']
    # print((batch_size, device, lr, epochs, tbPath, modelPath))
    model_name = '_epochs=' + str(epochs) + '_lr=' + str(lr) + '_batchSize=' + str(batch_size)
    tb = SummaryWriter(log_dir=tbPath + model_name)

    training_dataloader = DataLoader(training_data, shuffle=True, batch_size=batch_size)
    validation_dataloader = DataLoader(validation_data, batch_size=batch_size)

    model = VGG_Style_5Block(in_channels=4)
    print(model)
    loss_criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model = model.to(device)
    train_step = 0
    val_step = 0

    for epoch in tqdm(range(epochs), desc='Epochs'):
        ## training
        model.train()
        train_loss = 0
        num_train_batches = len(training_dataloader)
        for batch, (X, y) in enumerate(training_dataloader):
            train_step += 1
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            loss = loss_criterion(pred, y)
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss = loss.item()
            train_loss += loss
            tb.add_scalar('training loss vs step', loss, train_step)
        train_loss /= num_train_batches

        #validating
        val_size = len(validation_dataloader.dataset)
        num_val_batches = len(validation_dataloader)
        val_loss, correct = 0, 0
        model.eval()
        with torch.no_grad():
            for X, y in validation_dataloader:
                val_step += 1
                X = X.to(device)
                y = y.to(device)
                pred = model(X)
                loss = loss_criterion(pred, y).item()
                val_loss += loss
                tb.add_scalar('validation loss vs step', loss, val_step)
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
        val_loss /= num_val_batches
        correct /= val_size
        accuracy = 100*correct
        if val_loss < min_loss_so_far:
            min_loss_so_far = val_loss
            checkpoint = dict()
            checkpoint['sd'] = model.state_dict()
            checkpoint['h_params'] = model_name
            checkpoint['saved_epoch'] = epoch
            torch.save(checkpoint, modelPath + 'bestModelSoFar.pt')
        tb.add_scalar('average training loss vs epoch', train_loss, epoch)
        tb.add_scalar('average validation loss vs epoch', val_loss, epoch)
        tb.add_scalar('validation accuracy vs epoch', accuracy, epoch)
    
    del model
    del training_dataloader
    del validation_dataloader    

In [5]:
# tempModel = CNN001().to(torch.device("cuda:1"))
# summary(tempModel, (1,4,256))
# del tempModel

In [6]:
# model = CNN002(*(
#     (4,8,(30,1),1,0),
#     (8,16,(1,64),1,0),
#     (4,30,256)
# ))
# print(model)

In [7]:
parameters = dict(
    lr = [0.001],
    batch_size = [100],
    epochs = [200]
)

# modelParams = (
#     (4,8,(30,1),1,0),
#     (8,16,(1,64),1,0),
#     (4,30,256)
# )

param_values = [v for v in parameters.values()]
## performing hyper paramete tuning
device = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")
print(device)
for run_id, (lr,batch_size, epochs) in enumerate(product(*param_values)):
    print("Run ID: {} | H Params==> lr:{}, batch_size:{}, epochs:{}".format(run_id, lr, batch_size, epochs))
    training_args = dict()
    training_args['num_epochs'] = epochs
    training_args['lr'] = lr
    training_args['batch_size'] = batch_size
    training_args['dev_id'] = device
    training_args['tbPath'] = tbPath
    training_args['modelPath'] = modelPath
    # training_args['modelParams'] = modelParams
    model_trainer(training_args)

cuda:1
Run ID: 0 | H Params==> lr:0.001, batch_size:100, epochs:200
VGG_Style_5Block(
  (net): Sequential(
    (0): Conv2d(4, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Flatten(s



Epochs:   0%|          | 0/200 [00:00<?, ?it/s]

In [9]:
test_loader = DataLoader(testing_data)
## perfroming test
checkpoint = torch.load(modelPath + 'bestModelSoFar.pt')
model = VGG_Style_5Block(in_channels=4)
model.load_state_dict(checkpoint['sd'])
print("Model hyper params:{}".format(checkpoint['h_params']))
print("Validation loss when the model was saved:{}".format(min_loss_so_far))
print("Epoch when the model was saved:{}".format(checkpoint['saved_epoch']))
loss_criteria = nn.CrossEntropyLoss()
test_size = len(test_loader.dataset)
num_test_batches = len(test_loader)
test_loss, correct = 0, 0
model.to(device)
model.eval()
with torch.no_grad():
    for X, y in test_loader:
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = loss_criteria(pred, y).item()
        test_loss += loss
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_test_batches
correct /= test_size
accuracy = 100*correct
print(f"Test Error: \n Accuracy: {(accuracy):>0.1f}%, Avg loss: {test_loss:>8f} \n")
del model

Model hyper params:_epochs=200_lr=0.001_batchSize=100
Validation loss when the model was saved:2.3023527383804323
Epoch when the model was saved:126
Test Error: 
 Accuracy: 9.9%, Avg loss: 2.302656 

