In [1]:
import torch
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import random
import torch.nn as nn
import torch.nn.functional as F

In [2]:
ligands = ["TNF", "R84", "PIC", "P3K", "FLA", "CpG", "FSL", "LPS", "UST"]
polarization = ["", "ib", "ig", "i0", "i3", "i4"]
replicas, size = 2, 1288

In [3]:
from core.getdata import *
from core.dataset import *

<h3>Example of GetData</h3>

In [4]:
TNFib1 = GetData(ligands[0], polarization[1], replicas, size)
TNFib1.X.shape

(1288, 98)

<h3>Example of Dataset</h3>

In [5]:
data = Dataset(ligands, polarization, replicas, size)

In [6]:
%%time
for _ in data:
    pass

Wall time: 38 ms


In [7]:
data.data.shape

(69552, 98, 1)

In [8]:
print(type(data.data), type(data.labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [9]:
print(data.labels[65000], data.labels[65000].shape) #labels are multi-hot encoded

8 ()


<h3>Initializing Dataloaders</h3>

In [10]:
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

In [11]:
X_len, test_len = int(len(data.data) * 0.75), int(len(data.data) * 0.25)
train_len, val_len = int(X_len * 0.75), int(X_len * 0.25)
print(X_len, test_len, train_len, val_len) #lengths
print(X_len + test_len, len(data.data))

52164 17388 39123 13041
69552 69552


In [12]:
training_data = data.data.reshape(data.data.shape[0], data.data.shape[1], 1) #adds extra dimension

In [13]:
data.data[1000].shape
len(data)

69552

In [14]:
dataset_X, dataset_val = torch.utils.data.random_split(data, [X_len, test_len]) # need separate data and labels for LSTM?

In [15]:
dataloader_train = torch.utils.data.DataLoader(dataset_X, batch_size=64, shuffle=True, num_workers=4)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=64, shuffle=True, num_workers=4)

In [16]:
print(len(dataloader_train), len(dataloader_val), len(dataloader_train) + len(dataloader_val))
###
#data[0:10]

816 272 1088


<h3>Model Training and Evaluation</h3>

In [17]:
from core.network import *
from core.trainer import *

In [18]:
torch.cuda.is_available()

True

In [19]:
#model parameters
input_size = 1
hidden_sizes = [16, 98]
output_size = 9

#training parameters
n_epochs = 15
batch_size = 64
learning_rate = 1e-3

In [20]:
model = LSTM(input_size, hidden_sizes, output_size, num_layers=2, device="cuda:0")
model.train()

LSTM(
  (lstm): LSTM(1, 16, batch_first=True)
  (fc1): Linear(in_features=16, out_features=98, bias=True)
  (fc2): Linear(in_features=98, out_features=9, bias=True)
)

In [21]:
trainer = LSTMTrainer(model=model, device="cuda:0")

In [22]:
trainer.train(dataloader_train, dataloader_val, batch_size=batch_size, n_epochs=n_epochs)

  7%|█████▌                                                                             | 1/15 [00:34<08:09, 34.99s/it]

Epoch 001: | Training Loss: 2.170864293972651 | Validation Loss: 2.1908295811975704


 13%|███████████                                                                        | 2/15 [01:07<07:18, 33.77s/it]

Epoch 002: | Training Loss: 2.176334483950746 | Validation Loss: 2.1062171581913445


 20%|████████████████▌                                                                  | 3/15 [01:40<06:39, 33.26s/it]

Epoch 003: | Training Loss: 2.168659151623062 | Validation Loss: 2.17518111975754


 27%|██████████████████████▏                                                            | 4/15 [02:13<06:04, 33.13s/it]

Epoch 004: | Training Loss: 2.1298987132077123 | Validation Loss: 2.068130885853487


 33%|███████████████████████████▋                                                       | 5/15 [02:46<05:29, 32.99s/it]

Epoch 005: | Training Loss: 2.0597063462231673 | Validation Loss: 2.108114802662064


 40%|█████████████████████████████████▏                                                 | 6/15 [03:18<04:55, 32.86s/it]

Epoch 006: | Training Loss: 2.10099058554453 | Validation Loss: 2.108763295061448


 47%|██████████████████████████████████████▋                                            | 7/15 [03:51<04:22, 32.82s/it]

Epoch 007: | Training Loss: 2.075746408572384 | Validation Loss: 2.073082237997476


 53%|████████████████████████████████████████████▎                                      | 8/15 [04:24<03:49, 32.75s/it]

Epoch 008: | Training Loss: 2.054059944900812 | Validation Loss: 2.105327714891995


 60%|█████████████████████████████████████████████████▊                                 | 9/15 [04:58<03:19, 33.17s/it]

Epoch 009: | Training Loss: 2.0619801645185434 | Validation Loss: 2.051523646011072


 67%|██████████████████████████████████████████████████████▋                           | 10/15 [05:34<02:49, 33.98s/it]

Epoch 010: | Training Loss: 2.0546531326630535 | Validation Loss: 2.073734548600281


 73%|████████████████████████████████████████████████████████████▏                     | 11/15 [06:08<02:16, 34.04s/it]

Epoch 011: | Training Loss: 2.075492855818833 | Validation Loss: 2.067181561361341


 80%|█████████████████████████████████████████████████████████████████▌                | 12/15 [06:43<01:42, 34.30s/it]

Epoch 012: | Training Loss: 2.0899062086554134 | Validation Loss: 2.114955766236081


 87%|███████████████████████████████████████████████████████████████████████           | 13/15 [07:16<01:08, 34.16s/it]

Epoch 013: | Training Loss: 2.0578133665463505 | Validation Loss: 2.032967596369631


 93%|████████████████████████████████████████████████████████████████████████████▌     | 14/15 [07:51<00:34, 34.29s/it]

Epoch 014: | Training Loss: 2.023668055879135 | Validation Loss: 2.007647136993268


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [08:26<00:00, 33.76s/it]

Epoch 015: | Training Loss: 2.0117892010831366 | Validation Loss: 1.988891225965584





In [23]:
# dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=32, shuffle=True, num_workers=4)
# dic = {"y_pred": np.array([]), "y_true": np.array([])}
# for x_batch, y_batch in dataloader_val:
#     x_batch, y_batch = x_batch.to(self.device), y_batch.to(self.device)
#     # to do : convert y_batch to np array and vstack it y_true
#     y_batch = y_batch.detach().cpu().numpy()
#     dic[y_true].vstack(y_true, y_batch)
#     y_pred = trainer.model(x_batch)
#     y_pred = F.softmax(y_pred, dim=1)
#     # to do : convert to np array and vstack it to y_pred
#     y_pred = y_pred.detach().cpu().numpy()
#     dic[y_pred].vstack(y_pred)
    
# df = pd.DataFrame(dic)

In [24]:
torch.cuda.empty_cache()
x_batch, y_batch = dataset_val[0:5000]
# to do : convert x_batch to tensor and send to gpu 
x_batch = torch.tensor(x_batch, device=torch.device("cuda:0"))
y_pred = trainer.model(x_batch)
y_pred = F.softmax(y_pred, dim=1)
# to do : convert to np array and vstack it to y_pred
y_pred = y_pred.detach().cpu().numpy()
y_pred = np.argmax(y_pred, axis=1)
dic = {"y_pred": y_pred, "y_true": y_batch}

df = pd.DataFrame(dic)

In [25]:
df

Unnamed: 0,y_pred,y_true
0,3,5
1,8,2
2,3,4
3,7,7
4,3,7
...,...,...
4995,0,4
4996,3,4
4997,2,6
4998,1,1


In [26]:
sum(df["y_pred"] == df["y_true"])/5000

0.2552

In [27]:
1/9

0.1111111111111111

<h3>Classification Report</h3>

In [28]:
import sklearn.metrics