In [1]:
import pandas as pd
import numpy as np
import random
import scanpy as sc
import os
import sys

os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_squared_error as mse

sys.path.insert(0, '../repos/GNNImpute/')
from GNNImpute.api import GNNImpute

In [2]:
def get_data_for_i(i):
    original_ = pd.read_csv('../../data/simulation/data.csv', index_col=0)
    df_ = pd.read_csv('../../data/simulation/drp_{}0.csv'.format(i), index_col=0)
    df_.index = [int(i) for i in df_.index]
    df_.columns = [int(i) for i in df_.columns]

    original_.columns = df_.columns
    original_.index = df_.index

    n = original_.size
    original_val = original_.values.copy()
    t = list(np.ndindex(original_.shape))
    random.Random(42).shuffle(t)

    mask = t[:int(len(t)/10 * i)]

    thr = np.sum(np.sign(df_)) > 0
    original_ = original_.loc[:, list(thr)]
    df_ = df_.loc[:, list(thr)]

    # original = original_.values
    original = np.log(original_+1)

    # df = df_.values
    df = np.log(df_+1)

    tmp = pd.DataFrame(thr)
    remove = [int(i) for i in tmp[tmp[0] == False].index]
    mask = [i for i in mask if i[1] not in remove]
    
    return df, mask, original

In [4]:
mses = {}
corrs = {}
method = 'GNNImpute'

for i in (range(9)):
    print(i)
    df, mask, original = get_data_for_i(i+1)

    adata = sc.AnnData(df.values)
    adata = GNNImpute(
        adata=adata, layer='GATConv',
        no_cuda=False,epochs=3000, 
        lr=0.001, weight_decay=0.0005,
        hidden=50, patience=200,
        fastmode=True, heads=3,
        use_raw=False,
        verbose=True
    )
    
    pred = adata.X
    pred = pd.DataFrame(pred, columns=df.columns, index=df.index)
    pred.to_csv('result/{}_{}.csv.gz'.format(method, i), compression='gzip')

    origin = np.array([original.loc[i] for i in mask])
    predict = np.array([pred.loc[i] for i in mask])

    mses[i] = mse(origin, predict)
    corrs[i] = np.corrcoef(origin, predict)[0][1]

0
Epoch: 0010 loss_train: 1.0070 loss_val: 1.0055
Epoch: 0020 loss_train: 1.0045 loss_val: 1.0034
Epoch: 0030 loss_train: 0.9947 loss_val: 0.9968
Epoch: 0040 loss_train: 0.9764 loss_val: 0.9789
Epoch: 0050 loss_train: 0.9566 loss_val: 0.9628
Epoch: 0060 loss_train: 0.9416 loss_val: 0.9491
Epoch: 0070 loss_train: 0.9271 loss_val: 0.9377
Epoch: 0080 loss_train: 0.9156 loss_val: 0.9294
Epoch: 0090 loss_train: 0.9138 loss_val: 0.9245
Epoch: 0100 loss_train: 0.9111 loss_val: 0.9202
Epoch: 0110 loss_train: 0.9079 loss_val: 0.9192
Epoch: 0120 loss_train: 0.9062 loss_val: 0.9205
Epoch: 0130 loss_train: 0.9034 loss_val: 0.9158
Epoch: 0140 loss_train: 0.9045 loss_val: 0.9178
Epoch: 0150 loss_train: 0.9022 loss_val: 0.9132
Epoch: 0160 loss_train: 0.9023 loss_val: 0.9138
Epoch: 0170 loss_train: 0.9014 loss_val: 0.9161
Epoch: 0180 loss_train: 0.9022 loss_val: 0.9176
Epoch: 0190 loss_train: 0.9011 loss_val: 0.9138
Epoch: 0200 loss_train: 0.9012 loss_val: 0.9122
Epoch: 0210 loss_train: 0.9004 loss_va

RuntimeError: File 167.pkl cannot be opened.

In [5]:
pd.DataFrame(mses.values()).T

Unnamed: 0,0,1,2
0,0.339038,0.34054,0.341631


In [6]:
pd.DataFrame(corrs.values()).T

Unnamed: 0,0,1,2
0,0.24876,0.240786,0.234031


In [3]:
mses = {}
corrs = {}
method = 'GNNImpute'

for i in (range(3, 9)):
    print(i)
    df, mask, original = get_data_for_i(i+1)

    adata = sc.AnnData(df.values)
    adata = GNNImpute(
        adata=adata, layer='GATConv', no_cuda=False,
        d = '/export/scratch/inoue019/gnnimpute/', epochs=3000, 
        lr=0.001, weight_decay=0.0005,
        hidden=50, patience=200,
        fastmode=True, heads=3,
        use_raw=False,
        verbose=True
    )
    
    pred = adata.X
    pred = pd.DataFrame(pred, columns=df.columns, index=df.index)
    pred.to_csv('result/{}_{}.csv.gz'.format(method, i), compression='gzip')

    origin = np.array([original.loc[i] for i in mask])
    predict = np.array([pred.loc[i] for i in mask])

    mses[i] = mse(origin, predict)
    corrs[i] = np.corrcoef(origin, predict)[0][1]
    !rm -rf *.pkl

3
Epoch: 0010 loss_train: 1.0006 loss_val: 1.0159
Epoch: 0020 loss_train: 0.9991 loss_val: 1.0150
Epoch: 0030 loss_train: 0.9958 loss_val: 1.0114
Epoch: 0040 loss_train: 0.9871 loss_val: 1.0062
Epoch: 0050 loss_train: 0.9775 loss_val: 0.9946
Epoch: 0060 loss_train: 0.9625 loss_val: 0.9836
Epoch: 0070 loss_train: 0.9552 loss_val: 0.9753
Epoch: 0080 loss_train: 0.9469 loss_val: 0.9695
Epoch: 0090 loss_train: 0.9447 loss_val: 0.9648
Epoch: 0100 loss_train: 0.9399 loss_val: 0.9620
Epoch: 0110 loss_train: 0.9384 loss_val: 0.9604
Epoch: 0120 loss_train: 0.9383 loss_val: 0.9613
Epoch: 0130 loss_train: 0.9359 loss_val: 0.9612
Epoch: 0140 loss_train: 0.9355 loss_val: 0.9577
Epoch: 0150 loss_train: 0.9353 loss_val: 0.9601
Epoch: 0160 loss_train: 0.9334 loss_val: 0.9587
Epoch: 0170 loss_train: 0.9329 loss_val: 0.9596
Epoch: 0180 loss_train: 0.9338 loss_val: 0.9571
Epoch: 0190 loss_train: 0.9333 loss_val: 0.9598
Epoch: 0200 loss_train: 0.9338 loss_val: 0.9598
Epoch: 0210 loss_train: 0.9320 loss_va

Total time elapsed: 39.9103s
4
Epoch: 0010 loss_train: 1.0028 loss_val: 0.9977
Epoch: 0020 loss_train: 1.0016 loss_val: 0.9969
Epoch: 0030 loss_train: 0.9993 loss_val: 0.9965
Epoch: 0040 loss_train: 0.9959 loss_val: 0.9935
Epoch: 0050 loss_train: 0.9897 loss_val: 0.9884
Epoch: 0060 loss_train: 0.9803 loss_val: 0.9812
Epoch: 0070 loss_train: 0.9679 loss_val: 0.9725
Epoch: 0080 loss_train: 0.9607 loss_val: 0.9665
Epoch: 0090 loss_train: 0.9555 loss_val: 0.9629
Epoch: 0100 loss_train: 0.9530 loss_val: 0.9604
Epoch: 0110 loss_train: 0.9501 loss_val: 0.9597
Epoch: 0120 loss_train: 0.9498 loss_val: 0.9592
Epoch: 0130 loss_train: 0.9490 loss_val: 0.9561
Epoch: 0140 loss_train: 0.9487 loss_val: 0.9563
Epoch: 0150 loss_train: 0.9482 loss_val: 0.9587
Epoch: 0160 loss_train: 0.9467 loss_val: 0.9570
Epoch: 0170 loss_train: 0.9472 loss_val: 0.9550
Epoch: 0180 loss_train: 0.9458 loss_val: 0.9570
Epoch: 0190 loss_train: 0.9447 loss_val: 0.9556
Epoch: 0200 loss_train: 0.9450 loss_val: 0.9564
Epoch: 02

Epoch: 0470 loss_train: 0.9585 loss_val: 0.9543
Epoch: 0480 loss_train: 0.9574 loss_val: 0.9542
Epoch: 0490 loss_train: 0.9571 loss_val: 0.9540
Epoch: 0500 loss_train: 0.9580 loss_val: 0.9542
Epoch: 0510 loss_train: 0.9575 loss_val: 0.9543
Epoch: 0520 loss_train: 0.9576 loss_val: 0.9545
Epoch: 0530 loss_train: 0.9588 loss_val: 0.9541
Epoch: 0540 loss_train: 0.9580 loss_val: 0.9551
Epoch: 0550 loss_train: 0.9579 loss_val: 0.9540
Epoch: 0560 loss_train: 0.9574 loss_val: 0.9543
Epoch: 0570 loss_train: 0.9584 loss_val: 0.9537
Epoch: 0580 loss_train: 0.9582 loss_val: 0.9536
Epoch: 0590 loss_train: 0.9575 loss_val: 0.9548
Epoch: 0600 loss_train: 0.9587 loss_val: 0.9538
Epoch: 0610 loss_train: 0.9598 loss_val: 0.9539
Epoch: 0620 loss_train: 0.9588 loss_val: 0.9542
Epoch: 0630 loss_train: 0.9590 loss_val: 0.9545
Epoch: 0640 loss_train: 0.9587 loss_val: 0.9539
['/export/scratch/inoue019/gnnimpute/85.pkl', '/export/scratch/inoue019/gnnimpute/52.pkl', '/export/scratch/inoue019/gnnimpute/45.pkl', 

Total time elapsed: 21.3243s
7
Epoch: 0010 loss_train: 0.9969 loss_val: 1.0009
Epoch: 0020 loss_train: 0.9964 loss_val: 1.0005
Epoch: 0030 loss_train: 0.9959 loss_val: 1.0008
Epoch: 0040 loss_train: 0.9948 loss_val: 1.0003
Epoch: 0050 loss_train: 0.9924 loss_val: 0.9995
Epoch: 0060 loss_train: 0.9883 loss_val: 0.9987
Epoch: 0070 loss_train: 0.9844 loss_val: 0.9950
Epoch: 0080 loss_train: 0.9801 loss_val: 0.9941
Epoch: 0090 loss_train: 0.9781 loss_val: 0.9925
Epoch: 0100 loss_train: 0.9761 loss_val: 0.9929
Epoch: 0110 loss_train: 0.9744 loss_val: 0.9921
Epoch: 0120 loss_train: 0.9739 loss_val: 0.9920
Epoch: 0130 loss_train: 0.9737 loss_val: 0.9920
Epoch: 0140 loss_train: 0.9728 loss_val: 0.9913
Epoch: 0150 loss_train: 0.9720 loss_val: 0.9919
Epoch: 0160 loss_train: 0.9711 loss_val: 0.9914
Epoch: 0170 loss_train: 0.9696 loss_val: 0.9910
Epoch: 0180 loss_train: 0.9694 loss_val: 0.9917
Epoch: 0190 loss_train: 0.9691 loss_val: 0.9944
Epoch: 0200 loss_train: 0.9682 loss_val: 0.9913
Epoch: 02

Total time elapsed: 9.2642s


In [6]:
pd.DataFrame(mses.values()).T

Unnamed: 0,0,1,2,3,4,5
0,0.343796,0.345365,0.350325,0.356773,0.367477,0.38254


In [7]:
pd.DataFrame(corrs.values()).T

Unnamed: 0,0,1,2,3,4,5
0,0.227037,0.226545,0.216302,0.206688,0.181122,0.155269
