In [1]:
import torch
import numpy as np
import pandas as pd
import scipy.sparse as sp

import sys
sys.path.append("../..")

from metrics import print_binary_classification_metrics

from MOFGCN.model import GModel
from MOFGCN.optimizer import Optimizer
from MOFGCN.Entire_Drug_Cell.sampler import Sampler
from MOFGCN.myutils import roc_auc, translate_result, dir_path, exp_similarity, full_kernel, sparse_kernel, jaccard_coef, torch_corr_x_y

In [2]:
data_dir = '../../../../MOFGCN_/'

cna = pd.read_csv(data_dir + "nci60GeneCop.csv", index_col=0).T.fillna(0)
mutation = pd.read_csv(data_dir + "nci60GeneMut.csv", index_col=0).T.fillna(0)

cna = np.array(cna, dtype=np.float32)
mutation = np.array(mutation, dtype=np.float32)

In [3]:
gene = pd.read_csv('/home/kuangr/inoue019/drGAT/baseline/nci60_gene_exp.csv', index_col=0).T
drug_response = pd.read_csv('/home/kuangr/inoue019/drGAT/baseline/nci60Act.csv', index_col=0)
drug_response.columns = gene.index
gene = np.array(gene, dtype=np.float32)

In [4]:
nsc_class = pd.read_csv('/home/kuangr/inoue019/drGAT/results/nsc_cid_smiles_class_name.csv', index_col=0)[['NSC', 'MECHANISM']]
nsc_class = nsc_class[nsc_class.MECHANISM == 'DNA']
nsc_class.shape

(269, 2)

In [5]:
drug_response = drug_response[drug_response.index.isin(list(nsc_class.NSC))]
drug_response

Unnamed: 0,MCF7,MDA_MB_231,HS578T,BT_549,T47D,SF_268,SF_295,SF_539,SNB_19,SNB_75,...,PC_3,DU_145,786_0,A498,ACHN,CAKI_1,RXF_393,SN12C,TK_10,UO_31
740,0.703626,-1.219032,-1.892792,-0.877267,-1.156158,0.510978,0.536910,0.589970,-0.356387,-1.460314,...,0.626838,0.507493,0.682664,-0.891882,0.499337,0.541612,-1.400771,0.602244,-1.641942,0.231533
752,0.475296,-0.312852,-1.089067,-0.441030,-0.058619,0.057507,0.125700,0.111693,-3.285729,-0.114051,...,-0.321691,0.507798,0.384102,-1.314527,-0.318444,0.557175,0.345056,-0.047731,0.155244,-0.160223
755,0.704027,-0.438857,-0.548744,-1.441942,0.496864,0.096265,-0.082186,0.417634,-1.927502,-0.372021,...,0.123647,0.543639,0.623318,-1.374212,-0.173024,0.314436,-1.002183,-0.881252,0.491364,-0.183200
762,0.547964,-1.033803,-1.399273,-0.538268,1.137432,0.135942,-0.094460,0.562628,-1.398911,-1.050409,...,-0.294845,0.934592,0.591263,-0.552673,1.797227,1.260987,0.172806,0.869675,-0.529810,-0.634413
1390,0.517269,0.960399,-1.710657,-0.260192,-0.428596,-2.369012,0.224249,-1.481654,-2.369012,-0.950229,...,0.534589,0.819455,0.091197,-0.188655,0.800745,1.468891,0.182377,-0.100527,0.519999,-0.167908
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
772992,-0.672839,-0.009522,1.593912,-0.015442,-0.336637,0.263434,0.524381,1.450999,-0.073388,1.877515,...,0.140564,-0.863786,0.409866,2.250914,0.739791,1.910757,2.324851,0.728314,1.252344,0.959549
783107,0.780763,0.353024,-1.901341,-1.812769,0.402564,0.004460,0.329454,-0.072378,-2.294693,1.159211,...,0.031677,0.463469,0.134452,0.537280,0.483558,0.484789,0.790988,0.112736,-0.212100,0.516311
784722,1.544864,-0.722438,-0.591117,-0.292344,0.491538,0.250683,0.672894,0.135474,0.555915,0.754361,...,-0.565436,0.387815,0.702974,0.713895,0.149978,-1.265221,-0.104946,0.714125,-0.574573,-1.220123
789797,-0.561025,-0.217302,-0.638256,-0.638256,1.112001,-0.638256,-0.674451,-0.638256,-0.638256,0.861387,...,-0.266452,-0.638256,-0.638256,1.989888,-0.521750,2.481620,-0.331090,1.672402,-0.602061,2.006186


In [6]:
drug_feature = pd.read_csv('/home/kuangr/inoue019/Drug_Response_Prediction_using_GNN/drug cell association/mfp.csv', index_col=0)
feature_drug = np.array(drug_feature, dtype=np.float32)
feature_drug

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [7]:
train = pd.read_csv('/home/kuangr/inoue019/Drug_Response_Prediction_using_GNN/drug cell association/train.csv', index_col=0)
val = pd.read_csv('/home/kuangr/inoue019/Drug_Response_Prediction_using_GNN/drug cell association/val.csv', index_col=0)
test = pd.read_csv('/home/kuangr/inoue019/Drug_Response_Prediction_using_GNN/drug cell association/test.csv', index_col=0)

In [9]:
idxs, cols = list(drug_response.index), list(drug_response.columns)

In [10]:
def mask(data, rows, cols):
    for i, j in zip(rows, cols):
        data[j, i] = 0 
    return data

In [11]:
cell_drug_ =  drug_response.T.fillna(0)
cell_drug = np.array(cell_drug_, dtype=np.float32)

In [12]:
# remove test
rows_indices_test = [idxs.index(i) for i in test['Drug']]
cols_indices_test = [cols.index(i) for i in test['Cell']]
cell_drug_ = mask(cell_drug, rows_indices_test, cols_indices_test)

In [13]:
# remove val
rows_indices_val = [idxs.index(i) for i in val['Drug']]
cols_indices_val = [cols.index(i) for i in val['Cell']]
cell_drug_ = mask(cell_drug, rows_indices_val, cols_indices_val)

In [14]:
# remove val
train_data = (cell_drug > 0).astype(np.float32)
np.sum(train_data)

4560.0

In [15]:
np.sum((drug_response > 0).values)

7624

In [16]:
def get_mask(data, rows, cols):
    tmp = np.zeros(data.shape)
    for i, j in zip(rows, cols):
        tmp[i, j] = 1

    return torch.tensor(tmp.astype(bool))

In [17]:
test_mask = get_mask(drug_response, rows_indices_test, cols_indices_test).T
test_mask

tensor([[ True, False, False,  ...,  True, False, False],
        [False, False,  True,  ..., False,  True, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [ True, False, False,  ..., False, False, False],
        [False,  True,  True,  ..., False,  True,  True],
        [ True,  True,  True,  ..., False, False, False]])

In [18]:
test_data = (torch.tensor(drug_response.values > 0, dtype=int).T * test_mask)
test_data

tensor([[1, 0, 0,  ..., 1, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 0, 0,  ..., 0, 0, 0],
        [0, 1, 1,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0]])

In [19]:
val_mask = get_mask(drug_response, rows_indices_val, cols_indices_val).T
val_mask

tensor([[False, False, False,  ..., False,  True, False],
        [False,  True, False,  ...,  True, False, False],
        [False, False,  True,  ...,  True, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False,  True, False]])

In [20]:
val_data = (torch.tensor(drug_response.values > 0, dtype=int).T * val_mask)
val_data

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 1, 0]])

In [21]:
len(torch.masked_select(test_data, test_mask))

3114

In [22]:
rows_indices_train = [idxs.index(i) for i in train['Drug']]
cols_indices_train = [cols.index(i) for i in train['Cell']]

In [23]:
train_mask = get_mask(drug_response, rows_indices_train, cols_indices_train).T
train_mask

tensor([[False,  True,  True,  ..., False, False,  True],
        [ True, False, False,  ..., False, False,  True],
        [ True,  True, False,  ..., False,  True,  True],
        ...,
        [False,  True,  True,  ...,  True,  True,  True],
        [ True, False, False,  ...,  True, False, False],
        [False, False, False,  ...,  True, False,  True]])

In [24]:
model = GModel(adj_mat=torch.tensor(train_data), gene=gene, cna=cna, mutation=mutation, sigma=2, k=2, iterates=3,
               feature_drug=feature_drug, n_hid1=192, n_hid2=64, alpha=8.70, device="cuda")

In [25]:
opt = Optimizer(model, torch.tensor(train_data), train_mask, val_data, val_mask, 
                test_data, test_mask, roc_auc, lr=1e-3, epochs=1000, device="cuda").to("cuda")

In [26]:
epoch, true_data, predict_data, test = opt()

epoch:   0 loss:0.823421 auc:0.5234
epoch:  20 loss:0.345163 auc:0.8448
epoch:  40 loss:0.211115 auc:0.8570
epoch:  60 loss:0.160556 auc:0.8664
epoch:  80 loss:0.140144 auc:0.8703
epoch: 100 loss:0.129941 auc:0.8721
epoch: 120 loss:0.124026 auc:0.8732
epoch: 140 loss:0.120231 auc:0.8737
epoch: 160 loss:0.117616 auc:0.8740
epoch: 180 loss:0.115730 auc:0.8742
epoch: 200 loss:0.114319 auc:0.8743
epoch: 220 loss:0.113237 auc:0.8744
epoch: 240 loss:0.112389 auc:0.8744
epoch: 260 loss:0.111711 auc:0.8744
epoch: 280 loss:0.111160 auc:0.8744
epoch: 300 loss:0.110708 auc:0.8743
epoch: 320 loss:0.110334 auc:0.8743
epoch: 340 loss:0.110023 auc:0.8743
epoch: 360 loss:0.109763 auc:0.8743
Fit finished.


# Val

In [41]:
predict = torch.round(predict_data).squeeze()
res = print_binary_classification_metrics(
    true_data.cpu().detach().numpy(), predict.cpu().detach().numpy()
)
res

Unnamed: 0,Accuracy,Precision,Recall,F1 Score,True Positive,True Negative,False Positive,False Negative
0,0.794155,0.786531,0.802333,0.794354,1238,1235,336,305


# Test

In [45]:
predict = torch.round(test).squeeze()
test_y = np.load('/home/kuangr/inoue019/Drug_Response_Prediction_using_GNN/drug cell association/test_labels.npy')
res = print_binary_classification_metrics(
    test_y, predict.cpu().detach().numpy()
)
res

Unnamed: 0,Accuracy,Precision,Recall,F1 Score,True Positive,True Negative,False Positive,False Negative
0,0.5,0.488064,0.483892,0.485969,736,821,772,785


In [31]:
len(test)

3114