In [1]:
import torch
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import random
import torch.nn as nn
import torch.nn.functional as F

In [2]:
ligands = ["TNF", "R84", "PIC", "P3K", "FLA", "CpG", "FSL", "LPS", "UST"]
polarization = ["", "ib", "ig", "i0", "i3", "i4"]
replicas, size = 2, 1288

In [3]:
from core.getdata import *
from core.dataset import *

<h3>Example of GetData</h3>

In [4]:
TNFib1 = GetData(ligands[0], polarization[1], replicas, size)
TNFib1.X.shape

(1288, 98)

<h3>Example of Dataset</h3>

In [5]:
data = Dataset(ligands, polarization, replicas, size)

In [6]:
%%time
for _ in data:
    pass

Wall time: 38 ms


In [7]:
data.data.shape

(69552, 98, 1)

In [8]:
print(type(data.data), type(data.labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [9]:
print(data.labels[65000], data.labels[65000].shape) #labels are multi-hot encoded

[0 0 0 0 0 0 0 0 1] (9,)


<h3>Initializing Dataloaders</h3>

In [10]:
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

In [11]:
X_len, test_len = int(len(data.data) * 0.75), int(len(data.data) * 0.25)
train_len, val_len = int(X_len * 0.75), int(X_len * 0.25)
print(X_len, test_len, train_len, val_len) #lengths

52164 17388 39123 13041


In [12]:
training_data = data.data.reshape(data.data.shape[0], data.data.shape[1], 1) #adds extra dimension

In [13]:
data.data[1000].shape

(98, 1)

In [14]:
dataset_X = torch.utils.data.Subset(data, list(range(0, X_len))) # need separate data and labels for LSTM?
dataset_test = torch.utils.data.Subset(data, list(range(X_len, len(data.data))))

In [15]:
#dataset_train = torch.utils.data.Subset(dataset_X, list(range(0, train_len)))
#dataset_val = torch.utils.data.Subset(dataset_X, list(range(train_len, X_len)))

In [16]:
dataloader_train = torch.utils.data.DataLoader(dataset_X, batch_size=64, shuffle=True, num_workers=4)
#dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=64, shuffle=True, num_workers=2)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=64, shuffle=True, num_workers=4)

In [17]:
###
print(len(dataloader_train), len(dataloader_test), len(dataloader_train) + len(dataloader_test))

816 272 1088


<h3>Model Training and Evaluation</h3>

In [18]:
from core.network import *
from core.trainer import *

In [19]:
torch.cuda.is_available()

True

In [20]:
#model parameters
input_size = 1
hidden_size = 9
batch_size = 32

#training parameters
n_epochs = 10
learning_rate = 1e-3
weight_decay = 1e-6

In [21]:
model = LSTM(input_size, hidden_size, device="cuda:0")
model.train()

LSTM(
  (lstm): LSTM(1, 9, batch_first=True)
  (fc): Linear(in_features=9, out_features=9, bias=True)
)

In [22]:
trainer = LSTMTrainer(model=model, device="cuda:0")

In [23]:
trainer.train(dataloader_train, batch_size=batch_size, n_epochs=n_epochs)

 10%|████████▎                                                                          | 1/10 [00:15<02:22, 15.82s/it]

Epoch 001: | Loss: 0.002414432377366493


 20%|████████████████▌                                                                  | 2/10 [00:30<02:00, 15.02s/it]

Epoch 002: | Loss: 0.0023367305555492238


 30%|████████████████████████▉                                                          | 3/10 [00:44<01:43, 14.80s/it]

Epoch 003: | Loss: 0.002343840030212097


 40%|█████████████████████████████████▏                                                 | 4/10 [00:59<01:28, 14.70s/it]

Epoch 004: | Loss: 0.0023563064871837845


 50%|█████████████████████████████████████████▌                                         | 5/10 [01:13<01:13, 14.65s/it]

Epoch 005: | Loss: 0.0023492076289179415


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [01:28<00:58, 14.61s/it]

Epoch 006: | Loss: 0.00233557581169669


 70%|██████████████████████████████████████████████████████████                         | 7/10 [01:43<00:43, 14.59s/it]

Epoch 007: | Loss: 0.002329222119446616


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [01:58<00:29, 14.90s/it]

Epoch 008: | Loss: 0.0023311086465210374


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [02:14<00:15, 15.31s/it]

Epoch 009: | Loss: 0.0023144540546683225


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [02:31<00:00, 15.12s/it]

Epoch 010: | Loss: 0.0023295741343153918





In [24]:
trainer.evaluate(dataloader_test, batch_size=batch_size, n_epochs=3)

 33%|████████████████████████████                                                        | 1/3 [00:12<00:25, 12.97s/it]

Epoch 001: | Loss: 0.03691851832359307


 67%|████████████████████████████████████████████████████████                            | 2/3 [00:25<00:12, 12.86s/it]

Epoch 002: | Loss: 0.036916075319464225


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:38<00:00, 12.82s/it]

Epoch 003: | Loss: 0.03691792900587036





<h3>Classification Report</h3>

In [25]:
import sklearn.metrics

In [26]:
##in progress

In [27]:
#np.argmax(y_pred, axis=1)