# MulticlassSN10 development

In [1]:
from pathlib import Path
import re
from typing import List
import json
import numpy as np
import pandas as pd
# import dovpanda
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
# import mlflow

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import NegativeClassOptimization.config as config
import NegativeClassOptimization.utils as utils
import NegativeClassOptimization.datasets as datasets
import NegativeClassOptimization.preprocessing as preprocessing
import NegativeClassOptimization.ml as ml

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
N = 100
NUM_CLASSES = 6

Define the dataset

In [3]:
dfs = utils.load_processed_dataframes()
df = dfs["train_val"].sample(N).reset_index(drop=True)
print(f"{df.columns=}")

df = preprocessing.onehot_encode_df(df)
arr = preprocessing.arr_from_list_series(df["Slide_onehot"])
arr = StandardScaler().fit_transform(arr)

antigens = df["Antigen"].unique().tolist()
encoder = LabelEncoder().fit(antigens)

df["X"] = arr.tolist()
df["y"] = encoder.transform(df["Antigen"])

df[["X", "y"]].head(3)

df.columns=Index(['Unnamed: 0.1', 'Unnamed: 0', 'ID_slide_Variant', 'CDR3', 'Best',
       'Slide', 'Energy', 'Structure', 'UID', 'Antigen',
       'Slide_farmhash_mod_10'],
      dtype='object')


Unnamed: 0,X,y
0,"[-0.4034732923929644, -0.6081636405595369, 0.0...",0
1,"[-0.4034732923929644, -0.6081636405595369, 0.0...",4
2,"[-0.4034732923929644, -0.6081636405595369, 0.0...",5


In [4]:
dataset = datasets.MulticlassDataset(df[["X", "y"]])

loader = DataLoader(
    dataset=dataset,
    batch_size=10,
    shuffle=False,
)

Define the model

In [5]:
model = ml.MulticlassSN10(num_classes=NUM_CLASSES)
y_pred = model.forward(torch.Tensor(df["X"]))
print(f"{y_pred.shape=}")

y_true = torch.Tensor(df["y"]).type(torch.long)
print(f"{y_true.shape=}")

loss = nn.CrossEntropyLoss()(y_pred, y_true)
print(f"{loss=}")

y_pred.shape=torch.Size([100, 6])
y_true.shape=torch.Size([100])
loss=tensor(1.8505, grad_fn=<NllLossBackward0>)


Train

In [6]:
loss_fn = nn.CrossEntropyLoss()
optimizer = ml.construct_optimizer(
    optimizer_type="Adam",
    learning_rate=0.01,
    momentum=0.9,
    weight_decay=0,
    model=model,
)

for t in range(10):
    print(f"Epoch {t+1}\n-------------------------------")
    losses = ml.train_loop(loader, model, loss_fn, optimizer)

Epoch 1
-------------------------------
loss: 1.821280  [    0/  100]
Epoch 2
-------------------------------
loss: 1.376157  [    0/  100]
Epoch 3
-------------------------------
loss: 0.914777  [    0/  100]
Epoch 4
-------------------------------
loss: 0.506919  [    0/  100]
Epoch 5
-------------------------------
loss: 0.251449  [    0/  100]
Epoch 6
-------------------------------
loss: 0.125682  [    0/  100]
Epoch 7
-------------------------------
loss: 0.063118  [    0/  100]
Epoch 8
-------------------------------
loss: 0.036617  [    0/  100]
Epoch 9
-------------------------------
loss: 0.023833  [    0/  100]
Epoch 10
-------------------------------
loss: 0.016755  [    0/  100]
