In [1]:
import numpy as np
import pandas as pd
import scipy.sparse as sp

import sys
sys.path.append("../..")

from MOFGCN.model import GModel
from MOFGCN.optimizer import Optimizer
from sklearn.model_selection import KFold
from MOFGCN.Entire_Drug_Cell.sampler import Sampler
from MOFGCN.myutils import roc_auc, translate_result, dir_path

In [2]:
data_dir = '../../processed_data/'

In [3]:
# 加载细胞系-药物矩阵
cell_drug = pd.read_csv(data_dir + "cell_drug_binary.csv", index_col=0, header=0)
cell_drug = np.array(cell_drug, dtype=np.float32)
adj_mat_coo_data = sp.coo_matrix(cell_drug).data

# 加载药物-指纹特征矩阵
drug_feature = pd.read_csv(data_dir + "drug_feature.csv", index_col=0, header=0)
feature_drug = np.array(drug_feature, dtype=np.float32)

# 加载细胞系-基因特征矩阵
gene = pd.read_csv(data_dir + "gene_feature.csv.zip", index_col=0, header=0)
gene = np.array(gene, dtype=np.float32)

# 加载细胞系-cna特征矩阵
cna = pd.read_csv(data_dir + "cna_feature.csv.zip", index_col=0, header=0)
cna = np.array(cna, dtype=np.float32)

# 加载细胞系-mutaion特征矩阵
mutation = pd.read_csv("../../../data/CCLE/cell_mutation.csv", index_col=0, header=0)
mutation = np.array(mutation, dtype=np.float32)

# 加载null_mask
null_mask = np.zeros(cell_drug.shape, dtype=np.float32)

In [4]:
k = 5
kfold = KFold(n_splits=k, shuffle=True, random_state=11)

epochs = []
true_datas = pd.DataFrame()
predict_datas = pd.DataFrame()

for fold in range(k):
    for train_index, test_index in kfold.split(np.arange(adj_mat_coo_data.shape[0])):
        sampler = Sampler(cell_drug, train_index, test_index, null_mask)
        model = GModel(adj_mat=sampler.train_data, gene=gene, cna=cna, mutation=mutation, sigma=2, k=2, iterates=3,
                       feature_drug=feature_drug, n_hid1=192, n_hid2=64, alpha=8.70, device="cuda")
        opt = Optimizer(model, sampler.train_data, sampler.test_data, sampler.test_mask, sampler.train_mask,
                        roc_auc, lr=1e-3, epochs=1000, device="cuda").to("cuda")
        epoch, true_data, predict_data = opt()

        epochs.append(epoch)
        true_datas = true_datas.append(translate_result(true_data))
        predict_datas = predict_datas.append(translate_result(predict_data))

torch.Size([436, 24])
epoch:   0 loss:0.623105 auc:0.5237
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
epoch:  20 loss:0.303179 auc:0.7746
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
epoch:  40 loss:0.257120 a

epoch: 360 loss:0.059336 auc:0.8463
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
epoch: 380 loss:0.059090 auc:0.8465
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
torch.Size([436, 24])
epoch: 400 loss:0.058762 auc:0.8469
torch.Size([

AttributeError: 'DataFrame' object has no attribute 'append'

In [22]:
sampler.test_data.shape

torch.Size([436, 24])

In [17]:
sampler.train_data

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [10]:
sampler.train_data

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [24]:
sampler.test_mask.shape

torch.Size([436, 24])

In [27]:
sampler.test_mask

(436, 24)

In [13]:
sampler.train_mask

tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]])

In [None]:
file = open("./result_data/epochs.txt", "w")
file.write(str(epochs))
file.close()

In [None]:
pd.DataFrame(true_datas)
# .to_csv("./result_data/true_data.csv")

In [None]:
pd.DataFrame(predict_datas)
# .to_csv("./result_data/predict_data.csv")