# In google colab!

In [1]:
import pandas as pd
import numpy as np

In [2]:
! cp /content/drive/MyDrive/gin_depmap_transfer_learning/normalized_no_leakage.zip .
! unzip normalized_no_leakage.zip

Archive:  normalized_no_leakage.zip
  inflating: depmap_crispr_zscore.csv  
  inflating: depmap_expression_lfc_zscore.csv  
  inflating: hap1_expression_lfc.csv  
  inflating: hap1_crispr.csv         


In [3]:
norm_dir = ""

hap1_expression_lfc = pd.read_csv(norm_dir + "hap1_expression_lfc.csv", index_col=0)
hap1_crispr = pd.read_csv(norm_dir + "hap1_crispr.csv", index_col=0)
depmap_expression_lfc_zscore = pd.read_csv(norm_dir + "depmap_expression_lfc_zscore.csv", index_col=0)
depmap_crispr_zscore = pd.read_csv(norm_dir + "depmap_crispr_zscore.csv", index_col=0)
hap1_expression_lfc.shape, hap1_crispr.shape, depmap_expression_lfc_zscore.shape, depmap_crispr_zscore.shape

((60, 16372), (60, 16432), (1021, 16372), (1021, 16432))

In [4]:
! cp /content/drive/MyDrive/gin_depmap_transfer_learning/tcga_expression_lfc_zscore.csv.zip .
! unzip tcga_expression_lfc_zscore.csv.zip

Archive:  tcga_expression_lfc_zscore.csv.zip
  inflating: tcga_expression_lfc_zscore.csv  


In [5]:
tcga_expression_lfc_zscore = pd.read_csv(norm_dir + "tcga_expression_lfc_zscore.csv", index_col=0)

In [6]:
tcga_expression_lfc_zscore.shape

(10534, 16372)

In [7]:
! git clone https://github.com/danielchang2002/GI_transfer_learning
%cd GI_transfer_learning/src
from utils import *
from vae import VAE

Cloning into 'GI_transfer_learning'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9 (delta 0), reused 9 (delta 0), pack-reused 0[K
Receiving objects: 100% (9/9), done.
/content/GI_transfer_learning/src


In [14]:
from vae import loss_function

In [8]:
hap1_expression_lfc_zscore = zscore(hap1_expression_lfc)

In [9]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64

def get_dataloader(df, shuffle=True):
    dataset = TensorDataset(torch.tensor(df.values).float().cuda())
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

train_loader = get_dataloader(pd.concat([tcga_expression_lfc_zscore, depmap_expression_lfc_zscore, hap1_expression_lfc_zscore]), shuffle=True)
test_loader = get_dataloader(hap1_expression_lfc_zscore, shuffle=False)

In [10]:
mse = nn.MSELoss()

In [11]:
torch.cuda.empty_cache()

In [12]:
model = VAE(depmap_expression_lfc_zscore.shape[1], 4096, 128).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 300
num_warmup_epochs = 100

In [15]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_mse = 0
    beta = min(1.0, float(epoch) / num_warmup_epochs)
    for (data_,) in train_loader:
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data_)
        loss = loss_function(recon_batch, data_, mu, logvar, beta)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        train_mse += mse(recon_batch, data_).item()

    train_loss /= len(train_loader.dataset)
    train_mse /= len(train_loader)

    model.eval()
    test_loss = 0
    test_mse = 0
    with torch.no_grad():
      for (data_,) in test_loader:
        recon_batch, mu, logvar = model(data_)
        loss = loss_function(recon_batch, data_, mu, logvar, beta)
        test_loss += loss.item()
        test_mse += mse(recon_batch, data_).item()

    test_loss /= len(test_loader.dataset)
    test_mse /= len(test_loader)

    print('Epoch: {} \tTrain loss: {:.3f} \t Test loss: {:.3f}'.format(epoch, train_loss, test_loss))
    print('Epoch: {} \tTrain mse: {:.3f} \t Test mse: {:.3f}'.format(epoch, train_mse, test_mse))

Epoch: 0 	Train loss: 13521.797 	 Test loss: 17751.369
Epoch: 0 	Train mse: 0.825 	 Test mse: 1.084
Epoch: 1 	Train loss: 10061.823 	 Test loss: 17238.431
Epoch: 1 	Train mse: 0.615 	 Test mse: 1.053
Epoch: 2 	Train loss: 9256.011 	 Test loss: 16827.140
Epoch: 2 	Train mse: 0.565 	 Test mse: 1.028
Epoch: 3 	Train loss: 8730.225 	 Test loss: 16465.939
Epoch: 3 	Train mse: 0.533 	 Test mse: 1.006
Epoch: 4 	Train loss: 8326.342 	 Test loss: 15916.825
Epoch: 4 	Train mse: 0.508 	 Test mse: 0.972
Epoch: 5 	Train loss: 8022.223 	 Test loss: 15613.763
Epoch: 5 	Train mse: 0.490 	 Test mse: 0.954
Epoch: 6 	Train loss: 7713.692 	 Test loss: 15383.811
Epoch: 6 	Train mse: 0.471 	 Test mse: 0.940
Epoch: 7 	Train loss: 7475.024 	 Test loss: 14953.206
Epoch: 7 	Train mse: 0.456 	 Test mse: 0.913
Epoch: 8 	Train loss: 7274.270 	 Test loss: 14782.945
Epoch: 8 	Train mse: 0.444 	 Test mse: 0.903
Epoch: 9 	Train loss: 7088.612 	 Test loss: 14517.009
Epoch: 9 	Train mse: 0.432 	 Test mse: 0.886
Epoch: 1

KeyboardInterrupt: 

In [17]:
# save model state dict to file
torch.save(model.state_dict(), 'vae_300_epochs.pt')