In [99]:
import sys
import os
from tqdm.notebook import tqdm

sys.path.append("../")

import torch
import anndata as nd
from data.original import pertdata as pt
import pandas as pd
import time
from contextlib import contextmanager


def evaluate(model, objective, data_loader, with_y = True):
    model.eval()
    total_loss: float = 0.0
    total_samples: float = 0.0
    with torch.no_grad():
        if with_y:
            for x, y in data_loader:
                out = model(x)
                loss = objective(out, y)
                total_loss += loss.item()
                total_samples += len(x)
        else:
            for x in data_loader:
                x = x[0]
                out = model(x)
                loss = objective(out, x)
                total_loss += loss.item()
                total_samples += len(x)

    return total_loss / total_samples


@contextmanager
def tracking(model, objective, validation_set, with_y=True):
    start_time = time.perf_counter()
    yield
    end_time = time.perf_counter()
    elapsed_time = end_time - start_time
    loss = evaluate(model, objective, validation_set, with_y=with_y)
    print(f"Trained : {elapsed_time} seconds")
    print(f"Current Loss: {loss}")
    print(f"Current Accuracy: {evaluate(model, lambda y_hat, y: torch.sum(y_hat == y), validation_set, with_y=with_y)}")

In [92]:
def get_train_data():
    norman = pt.PertData.from_repo(name="norman", save_dir="../data/original")
    data = norman.adata
    train_slice = slice(0, int(0.8*len(data.X.toarray())))

    X_train = torch.from_numpy(data.X.toarray())[train_slice]
    y_train =  torch.from_numpy(pd.get_dummies(pt.generate_fixed_perturbation_labels(data.obs["condition"])).to_numpy())[train_slice]
    return X_train, y_train.to(torch.float32)


def get_val_data():
    norman = pt.PertData.from_repo(name="norman", save_dir="../data/original")
    data = norman.adata
    data_slice = slice(int(0.8*len(data.X.toarray())), )

    X_train = torch.from_numpy(data.X.toarray())[data_slice]
    y_train =  torch.from_numpy(pd.get_dummies(pt.generate_fixed_perturbation_labels(data.obs["condition"])).to_numpy())[data_slice]
    return X_train, y_train.to(torch.float32)

In [101]:
class AE(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 300),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(300, 30),
        )

        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(30, 300),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(300, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 5045),
            torch.nn.ReLU(),
        )


    def forward(self, x):
        z = self.encoder(x)
        y = self.decoder(z)
        return y


class Classifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(5045, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 1000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1000, 237)
        )


    def forward(self, x):
        return self.layers(x)

In [79]:
get_train_data()[1].dtype

Dataset directory already exists: ../data/original/norman
Loading dataset: norman


torch.float32

In [80]:
EPOCHS = 3
BATCH_SIZE = 128

In [81]:
train_set_superv = torch.utils.data.TensorDataset(*get_train_data())
val_set_superv = torch.utils.data.TensorDataset(*get_val_data())
train_set_superv = torch.utils.data.DataLoader(train_set_superv, BATCH_SIZE, shuffle=True)
val_set_superv= torch.utils.data.DataLoader(val_set_superv, BATCH_SIZE, shuffle=False)

model = Classifier()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00001)
objective = torch.nn.CrossEntropyLoss()

for i in range(EPOCHS):
    print(f"Epoch {i}:")
    with tracking(model, objective, val_set_superv):
        model.train()
        for x, y in tqdm(train_set_superv):
            out = model(x)
            loss = objective(out, y)
            loss.backward()
            optimizer.step()

del train_set_superv
del val_set_superv

Dataset directory already exists: ../data/original/norman
Loading dataset: norman
Dataset directory already exists: ../data/original/norman
Loading dataset: norman
Epoch 0:


  0%|          | 0/571 [00:00<?, ?it/s]

Trained : 14.622327082994161 seconds
Current Loss: 0.04219052391769554
Current Accuracy: 0.0
Epoch 1:


  0%|          | 0/571 [00:00<?, ?it/s]

Trained : 13.834802249999484 seconds
Current Loss: 0.04200136187724734
Current Accuracy: 0.0
Epoch 2:


  0%|          | 0/571 [00:00<?, ?it/s]

Trained : 13.799005041000783 seconds
Current Loss: 0.042162621317317066
Current Accuracy: 0.0


In [102]:
BATCH_SIZE= 2556
train_set_unsuperv = torch.utils.data.TensorDataset(get_train_data()[0])
val_set_unsuperv = torch.utils.data.TensorDataset(get_val_data()[0])

train_set_unsuperv = torch.utils.data.DataLoader(train_set_unsuperv, BATCH_SIZE, shuffle=True)
val_set_unsuperv = torch.utils.data.DataLoader(val_set_unsuperv, BATCH_SIZE, shuffle=False)

model = AE()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00001)
objective = torch.nn.MSELoss()

for i in range(EPOCHS):
    print(f"Epoch {i}:")
    with tracking(model, objective, val_set_unsuperv, with_y=False):
        model.train()
        for x in tqdm(train_set_unsuperv):
            x = x[0]
            out = model(x)
            loss = objective(out, x)
            loss.backward()
            optimizer.step()

del train_set_unsuperv
del val_set_unsuperv


Dataset directory already exists: ../data/original/norman
Loading dataset: norman
Dataset directory already exists: ../data/original/norman
Loading dataset: norman
Epoch 0:


  0%|          | 0/29 [00:00<?, ?it/s]

Trained : 43.03427679199376 seconds
Current Loss: 8.323808554938785e-05
Current Accuracy: 2419.055657584562
Epoch 1:


  0%|          | 0/29 [00:00<?, ?it/s]

Trained : 42.83595691599476 seconds
Current Loss: 7.741444078749663e-05
Current Accuracy: 2736.025999122855
Epoch 2:


  0%|          | 0/29 [00:00<?, ?it/s]

Trained : 42.91800879201037 seconds
Current Loss: 7.83318012330323e-05
Current Accuracy: 3410.1599144783727
