In [1]:
import torch
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import random
import torch.nn as nn
import torch.nn.functional as F

In [2]:
ligands = ["TNF", "R84", "PIC", "P3K", "FLA", "CpG", "FSL", "LPS", "UST"]
polarization = ["", "ib", "ig", "i0", "i3", "i4"]
replicas, size = 2, 1288

In [3]:
from core.getdata import *
from core.dataset import *

<h3>Example of GetData</h3>

In [4]:
TNFib1 = GetData(ligands[0], polarization[1], replicas, size)
TNFib1.X.shape

(1288, 98)

<h3>Example of Dataset</h3>

In [5]:
%%time
data = Dataset(ligands, polarization, replicas, size)

Wall time: 1.74 s


In [6]:
%%time
for _ in data:
    pass

Wall time: 37 ms


In [7]:
print(data.data.shape, data.labels.shape)

(69552, 98) (69552,)


In [8]:
print(type(data.data), type(data.labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [9]:
data[50000] #data, label, polarization

(array([-0.26260754, -0.32666735, -0.51903494, -0.57806016, -0.65227568,
        -0.62785288, -0.64710913, -0.58258334, -0.20597036,  0.34502968,
         0.34122651, -0.13649954, -0.30970538, -0.58316136, -0.73815084,
        -0.83118643, -0.83058515, -0.65037751, -0.42419233,  0.26292194,
         0.47983847,  0.88921226,  0.76002823,  0.39654181,  0.08346922,
        -0.23083186, -0.52227073, -0.51832029, -0.63926856, -0.64805235,
        -0.4966326 , -0.17124523,  0.24441291,  0.55625479,  0.61545437,
         1.13355591,  0.99762206,  0.76110859,  0.66954667,  0.74998271,
         0.59190267,  0.51815661,  0.61454709,  0.97688806,  0.76297776,
         0.69921832,  0.61852591,  0.72639086,  0.71619395,  0.49048819,
         0.42432573,  0.17027825,  0.09615295,  0.22668685,  0.73120842,
         0.7855645 ,  0.98766777,  0.89235227,  0.91657479,  1.04238457,
         1.02160155,  0.83340051,  0.98858535,  0.72190469,  0.73164909,
         0.62149777,  0.73465735,  1.05664454,  1.5

<h3>Initializing Dataloaders</h3>

In [10]:
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor

In [11]:
X_len, test_len = int(len(data.data) * 0.75), int(len(data.data) * 0.25)
train_len, val_len = int(X_len * 0.75), int(X_len * 0.25)
print(X_len, test_len, train_len, val_len) #lengths

52164 17388 39123 13041


In [12]:
training_data = data.data.reshape(data.data.shape[0], data.data.shape[1], 1) #unneeded for now i think

In [13]:
training_data.shape

(69552, 98, 1)

In [14]:
dataset_X = torch.utils.data.Subset(data, list(range(0, X_len))) # need separate data and labels for LSTM?
dataset_test = torch.utils.data.Subset(data, list(range(X_len, len(data.data))))

In [15]:
#dataset_train = torch.utils.data.Subset(dataset_X, list(range(0, train_len)))
#dataset_val = torch.utils.data.Subset(dataset_X, list(range(train_len, X_len)))

In [16]:
dataloader_train = torch.utils.data.DataLoader(dataset_X, batch_size=64, shuffle=True, num_workers=4)
#dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=64, shuffle=True, num_workers =2)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=64, shuffle=True, num_workers=4)

In [17]:
###
print(len(dataloader_train), len(dataloader_test), len(dataloader_train) + len(dataloader_test))

816 272 1088


<h3>Model Training and Evaluation</h3>

In [18]:
from core.network import *
from core.optimizer import *

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [20]:
#model parameters
input_dim = 98
output_dim = 1
hidden_dim = 64
layer_dim = 3
batch_size = 64
dropout = 0.2
#training parameters
n_epochs = 100
learning_rate = 1e-3
weight_decay = 1e-6

In [21]:
model = LSTM(input_dim, hidden_dim, layer_dim, output_dim, dropout)
model.to(device)

LSTM(
  (lstm): LSTM(98, 64, num_layers=3, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [22]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)

In [23]:
opt.train(device, dataloader_train, batch_size=batch_size, n_epochs=n_epochs, n_features=input_dim)

RuntimeError: input must have 3 dimensions, got 2