In [1]:
import torch
import torchvision
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from dataloader import MionicDataset
from torch.utils.data import Dataset, DataLoader, random_split

2024-02-27 10:15:35.603762: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from dataloader import MionicDataset
dataset = MionicDataset('../../Project/mionic_dataset/agg_mion_mebi.csv')

train_set, val_set = random_split(dataset, [0.8, 0.2], generator=torch.Generator().manual_seed(42))
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
val_loader = DataLoader(val_set, batch_size=1, shuffle=True)

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class ProteinClassifier(nn.Module):
    def __init__(self, input_shape=1280, output_shape=10, embedding_dim=256):
        super(ProteinClassifier, self).__init__()
        self.fc1 = nn.Linear(input_shape, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, output_shape)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # shape of x is (batch)x(feature dimension)
        #print(x.shape)
        x = torch.mean(x, dim=1)
        #print(x.shape)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

model = ProteinClassifier()

In [4]:
for batch in train_loader:
    x, y = batch
    print(x.shape)
    y_hat = model(x)
    print(y)
    print(y_hat.shape)
    break

torch.Size([1, 522, 1280])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
torch.Size([1, 10])


In [4]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [5]:
from tqdm import tqdm

In [6]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0
    last_loss = 0

    for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
        
        inputs, labels = data
        labels = labels
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i%1000 == 999:
            last_loss = running_loss / 1000
            print('   batch {} loss: {}'.format(i+1, last_loss))
            tb_x = epoch_index * len(train_loader)+i+1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0

    return last_loss

In [None]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number+1))

    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)

    running_vloss = 0
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

    avg_vloss = running_vloss / (i+1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    writer.add_scalars('Training vs Validation Loss', { 'Training':avg_loss, 'Validation': avg_vloss }, epoch_number +1)
    writer.flush()

    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

EPOCH 1:


 20%|███████▊                               | 1000/4981 [13:09<38:15,  1.73it/s]

   batch 1000 loss: 1.1950126080513


 25%|█████████▉                             | 1267/4981 [16:29<52:50,  1.17it/s]

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class ResidueClassifier(nn.Module):
    def __init__(self, input_shape=21, output_shape=10):
        super(ResidueClassifier, self).__init__()
        self.fc1 = nn.Linear(input_shape, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, output_shape)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [17]:
for batch in dataloader:
    x, y = batch
    y_hat = model(x)
    print(y)
    print(y_hat)
    break

torch.Size([1, 256])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([[0.5246, 0.5093, 0.5047, 0.5014, 0.5058, 0.4781, 0.5265, 0.5170, 0.5054,
         0.5339]], grad_fn=<SigmoidBackward0>)
