In [1]:
import sys
# ^^^ pyforest auto-imports - don't write above this line
import numpy as np
import pandas as pd
import scanpy as sc

from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score, normalized_mutual_info_score
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.cluster import SpectralClustering

from sklearn.decomposition import PCA, SparsePCA, KernelPCA
from sklearn.manifold import TSNE

from rpy2.robjects import r, pandas2ri
from rpy2.robjects.vectors import StrVector

pandas2ri.activate()

# import magic
import scprep

%matplotlib inline

# from sklearnex import patch_sklearn
# patch_sklearn()

import warnings

from sklearn.cluster import KMeans
from tqdm import tqdm

sys.path.insert(0, '../../imputation2/notebooks/repos/GNNImpute/')
from GNNImpute.api import GNNImpute

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_data(i):
    df = pd.read_csv('../data/{}/data.csv.gz'.format(i), index_col=0)
    tmp = np.sign(df)
    cols = (np.sum(tmp) > int((df.shape[0])*0.05))
    rows = (np.sum(tmp, axis=1) > int((df.shape[1])*0.05))
    df = np.log(df.loc[rows, cols] + 1)
    df_norm = df.copy()
    df_norm = scprep.normalize.library_size_normalize(df_norm)    
    df_norm = scprep.transform.sqrt(df_norm)
    X_norm = pd.DataFrame(df_norm, columns=df.columns)
    labels = df.index
    return X_norm, labels

In [3]:
!mkdir /export/scratch/inoue019/gnn_speed/

mkdir: cannot create directory ‘/export/scratch/inoue019/gnn_speed/’: File exists


In [4]:
import time

res = []
dir_list = ['baron', 'brosens', 'carey', 'hcabm40k', 'chang']
for i in tqdm(dir_list):
    X_norm, labels = get_data(i)
    start_time = time.time()
    adata = sc.AnnData(X_norm.values)
    adata = GNNImpute(
        adata=adata, layer='GATConv',
        no_cuda=False,
        d = '/export/scratch/inoue019/gnn_speed/',
        epochs=3000, 
        lr=0.001, weight_decay=0.0005,
        hidden=50, patience=200,
        fastmode=False, heads=3,
        use_raw=False,
        verbose=True
    )
    end_time = time.time()
    res.append(end_time - start_time)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 0010 loss_train: 1.0002 loss_val: 0.9957
Epoch: 0020 loss_train: 0.9982 loss_val: 0.9948
Epoch: 0030 loss_train: 0.9961 loss_val: 0.9937
Epoch: 0040 loss_train: 0.9905 loss_val: 0.9906
Epoch: 0050 loss_train: 0.9847 loss_val: 0.9871
Epoch: 0060 loss_train: 0.9819 loss_val: 0.9848
Epoch: 0070 loss_train: 0.9767 loss_val: 0.9811
Epoch: 0080 loss_train: 0.9726 loss_val: 0.9785
Epoch: 0090 loss_train: 0.9691 loss_val: 0.9740
Epoch: 0100 loss_train: 0.9652 loss_val: 0.9689
Epoch: 0110 loss_train: 0.9645 loss_val: 0.9682
Epoch: 0120 loss_train: 0.9629 loss_val: 0.9674
Epoch: 0130 loss_train: 0.9619 loss_val: 0.9662
Epoch: 0140 loss_train: 0.9611 loss_val: 0.9649
Epoch: 0150 loss_train: 0.9607 loss_val: 0.9650
Epoch: 0160 loss_train: 0.9598 loss_val: 0.9643
Epoch: 0170 loss_train: 0.9594 loss_val: 0.9646
Epoch: 0180 loss_train: 0.9597 loss_val: 0.9641
Epoch: 0190 loss_train: 0.9598 loss_val: 0.9643
Epoch: 0200 loss_train: 0.9594 loss_val: 0.9640
Epoch: 0210 loss_train: 0.9591 loss_val:

 20%|██        | 1/5 [00:45<03:00, 45.23s/it]

Total time elapsed: 29.6530s
Epoch: 0010 loss_train: 0.9996 loss_val: 0.9980
Epoch: 0020 loss_train: 0.9984 loss_val: 0.9967
Epoch: 0030 loss_train: 0.9915 loss_val: 0.9898
Epoch: 0040 loss_train: 0.9812 loss_val: 0.9823
Epoch: 0050 loss_train: 0.9723 loss_val: 0.9765
Epoch: 0060 loss_train: 0.9669 loss_val: 0.9703
Epoch: 0070 loss_train: 0.9640 loss_val: 0.9676
Epoch: 0080 loss_train: 0.9624 loss_val: 0.9651
Epoch: 0090 loss_train: 0.9623 loss_val: 0.9632
Epoch: 0100 loss_train: 0.9612 loss_val: 0.9630
Epoch: 0110 loss_train: 0.9605 loss_val: 0.9627
Epoch: 0120 loss_train: 0.9604 loss_val: 0.9619
Epoch: 0130 loss_train: 0.9599 loss_val: 0.9621
Epoch: 0140 loss_train: 0.9596 loss_val: 0.9617
Epoch: 0150 loss_train: 0.9596 loss_val: 0.9612
Epoch: 0160 loss_train: 0.9589 loss_val: 0.9615
Epoch: 0170 loss_train: 0.9591 loss_val: 0.9615
Epoch: 0180 loss_train: 0.9588 loss_val: 0.9613
Epoch: 0190 loss_train: 0.9589 loss_val: 0.9609
Epoch: 0200 loss_train: 0.9584 loss_val: 0.9607
Epoch: 0210

 40%|████      | 2/5 [02:10<03:25, 68.64s/it]

Total time elapsed: 48.7694s
Epoch: 0010 loss_train: 0.9997 loss_val: 0.9994
Epoch: 0020 loss_train: 0.9982 loss_val: 0.9983
Epoch: 0030 loss_train: 0.9931 loss_val: 0.9945
Epoch: 0040 loss_train: 0.9805 loss_val: 0.9828
Epoch: 0050 loss_train: 0.9704 loss_val: 0.9749
Epoch: 0060 loss_train: 0.9627 loss_val: 0.9689
Epoch: 0070 loss_train: 0.9573 loss_val: 0.9610
Epoch: 0080 loss_train: 0.9536 loss_val: 0.9571
Epoch: 0090 loss_train: 0.9518 loss_val: 0.9548
Epoch: 0100 loss_train: 0.9507 loss_val: 0.9520
Epoch: 0110 loss_train: 0.9493 loss_val: 0.9502
Epoch: 0120 loss_train: 0.9483 loss_val: 0.9498
Epoch: 0130 loss_train: 0.9481 loss_val: 0.9492
Epoch: 0140 loss_train: 0.9479 loss_val: 0.9492
Epoch: 0150 loss_train: 0.9474 loss_val: 0.9489
Epoch: 0160 loss_train: 0.9472 loss_val: 0.9496
Epoch: 0170 loss_train: 0.9468 loss_val: 0.9490
Epoch: 0180 loss_train: 0.9466 loss_val: 0.9488
Epoch: 0190 loss_train: 0.9462 loss_val: 0.9486
Epoch: 0200 loss_train: 0.9462 loss_val: 0.9475
Epoch: 0210

 60%|██████    | 3/5 [04:57<03:46, 113.45s/it]

Total time elapsed: 93.3781s
Epoch: 0010 loss_train: 0.9960 loss_val: 0.9986
Epoch: 0020 loss_train: 0.9886 loss_val: 0.9908
Epoch: 0030 loss_train: 0.9775 loss_val: 0.9842
Epoch: 0040 loss_train: 0.9657 loss_val: 0.9764
Epoch: 0050 loss_train: 0.9542 loss_val: 0.9666
Epoch: 0060 loss_train: 0.9443 loss_val: 0.9565
Epoch: 0070 loss_train: 0.9377 loss_val: 0.9497
Epoch: 0080 loss_train: 0.9330 loss_val: 0.9456
Epoch: 0090 loss_train: 0.9301 loss_val: 0.9426
Epoch: 0100 loss_train: 0.9277 loss_val: 0.9397
Epoch: 0110 loss_train: 0.9263 loss_val: 0.9377
Epoch: 0120 loss_train: 0.9256 loss_val: 0.9367
Epoch: 0130 loss_train: 0.9249 loss_val: 0.9363
Epoch: 0140 loss_train: 0.9243 loss_val: 0.9361
Epoch: 0150 loss_train: 0.9239 loss_val: 0.9353
Epoch: 0160 loss_train: 0.9236 loss_val: 0.9352
Epoch: 0170 loss_train: 0.9236 loss_val: 0.9347
Epoch: 0180 loss_train: 0.9233 loss_val: 0.9351
Epoch: 0190 loss_train: 0.9233 loss_val: 0.9354
Epoch: 0200 loss_train: 0.9230 loss_val: 0.9347
Epoch: 0210

 80%|████████  | 4/5 [06:40<01:49, 109.39s/it]

Total time elapsed: 44.5032s
Epoch: 0010 loss_train: 0.9956 loss_val: 1.0136
Epoch: 0020 loss_train: 0.9934 loss_val: 1.0111
Epoch: 0030 loss_train: 0.9874 loss_val: 1.0047
Epoch: 0040 loss_train: 0.9775 loss_val: 0.9965
Epoch: 0050 loss_train: 0.9629 loss_val: 0.9834
Epoch: 0060 loss_train: 0.9467 loss_val: 0.9669
Epoch: 0070 loss_train: 0.9315 loss_val: 0.9519
Epoch: 0080 loss_train: 0.9174 loss_val: 0.9391
Epoch: 0090 loss_train: 0.9075 loss_val: 0.9284
Epoch: 0100 loss_train: 0.8977 loss_val: 0.9207
Epoch: 0110 loss_train: 0.8890 loss_val: 0.9111
Epoch: 0120 loss_train: 0.8825 loss_val: 0.9042
Epoch: 0130 loss_train: 0.8773 loss_val: 0.9007
Epoch: 0140 loss_train: 0.8740 loss_val: 0.8973
Epoch: 0150 loss_train: 0.8704 loss_val: 0.8950
Epoch: 0160 loss_train: 0.8677 loss_val: 0.8925
Epoch: 0170 loss_train: 0.8641 loss_val: 0.8890
Epoch: 0180 loss_train: 0.8648 loss_val: 0.8883
Epoch: 0190 loss_train: 0.8607 loss_val: 0.8856
Epoch: 0200 loss_train: 0.8604 loss_val: 0.8845
Epoch: 0210

100%|██████████| 5/5 [07:31<00:00, 90.33s/it] 

Total time elapsed: 12.0114s





In [5]:
pd.DataFrame(res)

Unnamed: 0,0
0,33.375801
1,50.337179
2,96.288406
3,45.904006
4,12.467141
