<h3>Testing Notebook</h3>

In [5]:
import sys
sys.path.append('../..')

In [6]:
import torch
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import random
import torch.nn as nn
import torch.nn.functional as F

In [7]:
ligands = ["TNF", "R84", "PIC", "P3K", "FLA", "CpG", "FSL", "LPS", "UST"]
polarization = ["", "ib", "ig", "i0", "i3", "i4"]
replicas, size = 2, 1288 # replicated

In [8]:
from core.getdata import *
from core.dataset import *

<h3>Example of GetData</h3>

In [9]:
TNFib1 = GetData(ligands[0], polarization[1], replicas, size)
TNFib1.X.shape

(1288, 98)

<h3>Example of Dataset</h3>

In [16]:
data = DatasetPolar(ligands, polarization, replicas, size)

In [17]:
%%time
for _ in data:
    pass

Wall time: 38 ms


In [18]:
data.data.shape

(69552, 98, 1)

In [19]:
print(type(data.data), type(data.labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [20]:
print(data.labels, data.labels.shape) #labels are multi-hot encoded

[0 0 0 ... 8 8 8] (69552,)


<h3>Initializing Dataloaders</h3>

In [21]:
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

In [22]:
X_len, test_len = int(len(data.data) * 0.9), int(len(data.data) * 0.1)
train_len, val_len = int(X_len * 0.9), int(X_len * 0.1)
print(X_len, test_len, train_len, val_len) #lengths
print(X_len + test_len, len(data.data))

62596 6955 56336 6259
69551 69552


In [23]:
training_data = data.data.reshape(data.data.shape[0], data.data.shape[1], 1) #adds extra dimension

In [24]:
data.data[1000].shape
len(data)

69552

In [25]:
dataset_X, dataset_val = torch.utils.data.random_split(data, [X_len, test_len+1]) # still np arrays

In [26]:
dataloader_train = torch.utils.data.DataLoader(dataset_X, batch_size=64, shuffle=True)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=64, shuffle=True)

In [27]:
print(len(dataloader_train), len(dataloader_val), len(dataloader_train) + len(dataloader_val))
###
#data[0:10]

979 109 1088


<h3>Model Training</h3>

In [28]:
from core.network import *
from core.trainer import *

In [29]:
torch.cuda.is_available()

True

In [30]:
#model parameters
input_size = 1
hidden_sizes = 98
output_size = 9

#training parameters
n_epochs = 50
batch_size = 64
learning_rate = 1e-3

In [31]:
model = LSTM(input_size, hidden_sizes, output_size, num_layers=1, device="cuda:0")
model.train()

LSTM(
  (lstm): LSTM(98, 98, batch_first=True)
  (fc1): Linear(in_features=98, out_features=9, bias=True)
)

In [32]:
trainer = LSTMTrainer(model=model, device="cuda:0")

In [33]:
trainer.train(dataloader_train, dataloader_val, batch_size=batch_size, n_epochs=n_epochs)

  0%|                                                                                           | 0/50 [00:00<?, ?it/s]


RuntimeError: input.size(-1) must be equal to input_size. Expected 98, got 1

<h3>Evaluation</h3>

In [None]:
torch.cuda.empty_cache()
x_batch, y_batch = dataset_val[0:6956]
# to do : convert x_batch to tensor and send to gpu 
x_batch = torch.tensor(x_batch, device=torch.device("cuda:0"))
y_pred = trainer.network(x_batch)
y_pred = F.softmax(y_pred, dim=1)
# to do : convert to np array and vstack it to y_pred
y_pred = y_pred.detach().cpu().numpy()
y_pred = np.argmax(y_pred, axis=1)
dic = {"y_pred": y_pred, "y_true": y_batch}

df = pd.DataFrame(dic)

In [None]:
df

In [None]:
print(f' Accuracy: {sum(df["y_pred"] == df["y_true"])/6956}')

In [34]:
import pickle
import os
# torch.save(model.state_dict(), '../models/lstm3.pth')

<h3>Plots</h3>

In [35]:
import matplotlib.pyplot as plt

In [36]:
plt.plot(trainer.val_losses)
plt.plot(trainer.train_losses)

AttributeError: 'LSTMTrainer' object has no attribute 'val_losses'

3 LSTM layer seems to have even better accuracy, could continue optimizing parameters further

<h3>Classification Report</h3>

In [37]:
# not a holistic look at the data

In [None]:
import sklearn.metrics

In [None]:
report = sklearn.metrics.classification_report(dic["y_true"], dic["y_pred"], target_names=ligands, output_dict=True)
rep = pd.DataFrame(report).transpose()

In [None]:
rep

In [None]:
confusion = sklearn.metrics.confusion_matrix(dic["y_true"], dic["y_pred"])
plot = sklearn.metrics.ConfusionMatrixDisplay(confusion)

In [None]:
plot.plot()