In [1]:
import pandas as pd
import numpy as np
import random
import scanpy as sc
import os
import sys

from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import roc_auc_score

sys.path.insert(0, '../repos/GNNImpute/')
from GNNImpute.api import GNNImpute

In [2]:
def get_data_for_i(i):
    original_ = pd.read_csv('../../data/cell_simulation_10000/data.csv.gz', index_col=0)
    df_ = pd.read_csv('../../data/cell_simulation_10000/drp_{}0.csv.gz'.format(i), index_col=0)
    df_.index = [int(i) for i in df_.index]
    df_.columns = [int(i) for i in df_.columns]

    original_.columns = df_.columns
    original_.index = df_.index

    n = original_.size
    original_val = original_.values.copy()
    t = list(np.ndindex(original_.shape))
    random.Random(42).shuffle(t)

    mask = t[:int(len(t)/10 * i)]

    thr = np.sum(np.sign(df_)) > 0
    original_ = original_.loc[:, list(thr)]
    df_ = df_.loc[:, list(thr)]

    # original = original_.values
    original = np.log(original_+1)

    # df = df_.values
    df = np.log(df_+1)

    tmp = pd.DataFrame(thr)
    remove = [int(i) for i in tmp[tmp[0] == False].index]
    mask = [i for i in mask if i[1] not in remove]
    
    return df, mask, original

In [3]:
mses = {}
corrs = {}
mses_ = {}
corrs_ = {}
mses__ = {}
corrs__ = {}
aucs = {}
method = 'GNNImpute'

for i in (range(9)):
    print(i)
    df, mask, original = get_data_for_i(i+1)

    adata = sc.AnnData(df.values)
    adata = GNNImpute(
        adata=adata, layer='GATConv',
        no_cuda=False,
        d = '/export/scratch/inoue019/gnn_10/',
        epochs=3000, 
        lr=0.001, weight_decay=0.0005,
        hidden=50, patience=200,
        fastmode=True, heads=3,
        use_raw=False,
        verbose=True
    )
    
    pred = adata.X
    pred = pd.DataFrame(pred, columns=df.columns, index=df.index)
#     pred.to_csv('result/{}_{}.csv.gz'.format(method, i), compression='gzip')

    origin = np.array([original.loc[i] for i in mask])
    predict = np.array([pred.loc[i] for i in mask])

    mses[i] = mse(origin, predict)
    corrs[i] = np.corrcoef(origin, predict)[0][1]
    mses_[i] = mse(origin[origin != 0], predict[origin != 0])
    corrs_[i] = np.corrcoef(origin[origin != 0], predict[origin != 0])[0][1]
    mses__[i] = mse(origin[origin == 0], predict[origin == 0])
    
    df =  pd.DataFrame(np.array(predict))
    df['rank'] = df.rank()
    df['label'] = np.sign(origin)
    aucs[i] = roc_auc_score(df['label'], df['rank'])

0


OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 10.75 GiB total capacity; 9.67 GiB already allocated; 152.69 MiB free; 9.76 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
pd.DataFrame([
    mses.values(),
    mses_.values(),
    mses__.values(),
    corrs.values(),
    corrs_.values(),
    aucs.values()
], index=['mse', 'mse (nonzero)', 'mse (zero)', 'corr', 'corrs (nonzero)', 'auc'])