In [37]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm

import torch
from torch import nn

seed = 200
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [38]:
expr = pd.read_csv("data/expresion_matrix.csv")
expr.head()

Unnamed: 0,WASH7P,RP11-34P13.7,CICP27,AL627309.1,RP11-34P13.15,RP11-34P13.14,RP11-34P13.13,AP006222.2,RP4-669L17.10,RP4-669L17.8,...,MT-CO2,MT-ATP8,MT-ATP6,MT-CO3,MT-ND3,MT-ND4L,MT-ND4,MT-ND5,MT-ND6,MT-CYB
ACH-000956,1.134038,2.145153,1.396459,1.735286,0.0,0.018505,1.835159,2.400994,4.177105,0.551202,...,0.678072,1.169925,0.0,0.584963,3.499527,0.910733,0.443607,1.555816,2.432959,3.174726
ACH-000323,1.865275,2.51338,0.559418,1.051912,0.0,0.096828,1.799706,0.608955,2.571064,1.051802,...,0.505891,0.176323,0.214125,0.250962,1.411426,0.15056,0.565597,0.432959,0.815575,1.454176
ACH-000905,2.692535,2.769543,0.647189,1.954041,0.15056,0.354248,1.755331,0.543573,2.778809,0.905089,...,0.263034,0.163499,0.275007,0.263034,1.063503,0.0,0.0,0.176323,0.475085,2.049631
ACH-000973,2.16583,2.782041,0.566833,1.044218,0.0,1.141376,2.174128,1.196978,4.05251,0.552341,...,0.526069,0.0,0.0,0.214125,1.744161,0.124328,0.263034,0.584963,1.570463,1.970854
ACH-000070,0.050187,2.037069,0.808068,1.544984,0.0,0.05331,0.090498,1.00992,4.296848,0.005366,...,0.0,0.286881,0.0,0.014355,2.204767,0.0,0.214125,0.963474,1.636915,1.978196


In [39]:
sen = pd.read_csv("data/sensitivity_matrix_Activity_Area.csv", index_col = 0)
sen.head()

Unnamed: 0,AEW541,Nilotinib,17-AAG,PHA-665752,Lapatinib,Nutlin-3,AZD0530,PF2341066,L-685458,ZD-6474,...,LBW242,PD-0325901,PD-0332991,Paclitaxel,AZD6244,PLX4720,RAF265,TAE684,TKI258,Erlotinib
ACH-000956,1.6723,0.1278,3.0622,0.9168,0.5246,0.7904,0.6198,0.8545,0.08479,0.4765,...,0.4906,2.8355,0.6272,5.7297,2.8096,0.9326,0.8884,1.3712,0.8447,0.245
ACH-000323,1.1852,,5.0587,1.0796,0.6577,0.8625,1.9068,0.9563,1.1448,2.8994,...,1.3794,1.294,,5.9528,1.3092,1.4854,,1.7799,1.8371,0.8748
ACH-000905,0.9948,0.5269,3.535,0.0,1.089,0.0,0.8491,0.1933,0.1657,0.8562,...,0.2789,0.9413,0.0,7.0226,0.5872,0.5155,1.6212,1.1746,0.6844,1.92
ACH-000973,1.5436,0.5869,3.782,0.3701,0.1402,0.2235,1.2147,0.9721,0.3791,1.7839,...,0.0185,2.221,0.7454,5.5687,1.1495,0.07659,1.2339,1.3417,1.3518,0.2919
ACH-000070,1.7665,2.1147,3.6605,1.8775,1.5649,2.772,1.8525,2.0731,3.6396,2.0536,...,1.4433,2.5862,2.4569,7.7957,2.5412,1.5218,3.4219,3.2246,2.6279,1.6441


In [40]:
X = np.array(expr)
y = np.array(sen['Erlotinib'])
X.shape, y.shape

((467, 21703), (467,))

In [41]:
X = X[~np.isnan(y)]
y = y[~np.isnan(y)]
X.shape, y.shape

((466, 21703), (466,))

In [42]:
X_norm = (X - X.mean(axis=0)) / X.std(axis=0)
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, train_size = 0.75)

In [43]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(dim=1)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [44]:
X_train.shape, X_test.shape

(torch.Size([349, 21703]), torch.Size([117, 21703]))

In [45]:
## Define our MLP model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
        self.linear3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        y = self.linear1(x)
        y = self.relu(y)
        y = self.linear2(y)
        y = self.relu(y)
        y = self.linear3(y)
        return y

In [83]:
n_genes = X.shape[1]

model = MLP(input_dim = n_genes, hidden_dim = 50, output_dim = 1)

In [84]:
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [85]:
max_epock = 10

model.train()
for i in range(max_epock):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_function(y_pred, y_train)
    loss.backward()
    optimizer.step()
    print(loss)

tensor(0.6977, grad_fn=<MseLossBackward0>)
tensor(674.0610, grad_fn=<MseLossBackward0>)
tensor(90.3344, grad_fn=<MseLossBackward0>)
tensor(26.9443, grad_fn=<MseLossBackward0>)
tensor(19.8018, grad_fn=<MseLossBackward0>)
tensor(7.0374, grad_fn=<MseLossBackward0>)
tensor(36.7250, grad_fn=<MseLossBackward0>)
tensor(22.5659, grad_fn=<MseLossBackward0>)
tensor(5.0121, grad_fn=<MseLossBackward0>)
tensor(22.1889, grad_fn=<MseLossBackward0>)


In [86]:
model.eval()
y_hat = model(X_test)

In [87]:
y_hat = y_hat.squeeze().detach().numpy()
# y_test = y_test.numpy()

In [88]:
mse = np.mean((y_test - y_hat)**2)
cor = np.corrcoef(y_test, y_hat)[0,1]

print(mse)
print(cor)

6.82287
0.0656267498571769
