In [9]:
import sys
sys.path.append("..")

import pandas as pd

from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter

from dataset import *
from features import *
from train import *
from model import *

In [2]:
de_df = pd.read_parquet("../data/de_train.parquet")

train_index, val_index = stratified_split(de_df["cell_type"], 0.2, 45)
de_df_dataset_train = DataFrameDataset(de_df.iloc[train_index], mode="df")
de_df_dataset_val = DataFrameDataset(de_df.iloc[val_index], mode="df")

In [4]:
mtypes = list(set(de_df["sm_name"].to_list()))
mol_transforms = {
    "morgan_fp": TransformList([Sm2Smiles("../config/sm_smiles.csv", mode="path"), Smiles2Mol(), Mol2Morgan()]),
    "one_hot": TransformList([Type2OneHot(mtypes)])
}

ctypes = list(set(de_df["cell_type"].to_list()))
cell_transforms = {
    "one_hot": TransformList([Type2OneHot(ctypes)])
}

In [5]:
de_dataset_train = DEDataset(de_df_dataset_train, mol_transforms, cell_transforms)
de_dataset_val = DEDataset(de_df_dataset_val, mol_transforms, cell_transforms)

In [11]:
import os
os.chdir("..")

In [12]:
#### MODEL ####
mol_enc = nn.Sequential(
    nn.Linear(2048, 200)
)
cell_enc = nn.Sequential(
    nn.Linear(len(ctypes), 10)
)
regressor = nn.Sequential(
    nn.Linear(210, 100),
    nn.Tanh(),
    nn.Dropout(0.2),
    nn.Linear(100, 100),
    nn.Tanh(),
    nn.Dropout(0.2),
    nn.Linear(100, 500),
    nn.Tanh(),
    nn.Linear(500, len(de_df.columns)-5)
)

model = CombinerModel(
        mol_encoder=mol_enc,
        cell_encoder=cell_enc,
        regressor=regressor)
torch.save(model.state_dict(), "temp/ini_model.pkl")

#### LOADERS, DATA ####
train_dataloader = DataLoader(de_dataset_train, 256)
val_dataloader = DataLoader(de_dataset_val, 256)

#### CONFIG ####
lr = 0.01
epochs = 45
device = "cuda:0"

loss_fn = loss_fn
optimizer = Adam(model.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.8, patience=7)

#### TENSORBOARD ####
writer = SummaryWriter("./runs/kaggle/trying_out")

In [13]:
train_many_epochs(model, train_dataloader, val_dataloader, epochs, 
                  loss_fn, optimizer, scheduler, writer=writer, device=device)

TypeError: linear(): argument 'input' (position 1) must be Tensor, not list