Original implementation of Contrastive-sc method
(https://github.com/ciortanmadalina/contrastive-sc)

In [1]:
import sys
sys.path.append("..")
import argparse
import numpy as np
import dgl
from dgl import DGLGraph
import torch
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from collections import Counter
import pickle
import h5py
import random
import glob2
import seaborn as sns

import train
import models

%load_ext autoreload
%autoreload 2

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
device = train.get_device()

Using backend: pytorch

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [2]:
category = "real_data"

epochs = 10
batch_size = 128 
pca_size = 50
path = "../"
files = glob2.glob(f'{path}real_data/*.h5')
files = [f[len(f"'{path}real_data"):-3] for f in files]
print(files)
nb_genes =2000

['Quake_Smart-seq2_Trachea', 'Quake_Smart-seq2_Diaphragm', 'Quake_10x_Spleen', 'Young', 'mouse_ES_cell', 'Adam', 'Quake_10x_Bladder', 'Quake_Smart-seq2_Lung', 'Quake_10x_Limb_Muscle', 'worm_neuron_cell', 'mouse_bladder_cell', 'Romanov', 'Quake_Smart-seq2_Limb_Muscle', 'Muraro', '10X_PBMC']


In [3]:
# in this analysis we are only interested in the computational cost
# we choose one of the simulated datasets for facility to reshape
path = "../"
dataset = 'Quake_Smart-seq2_Diaphragm'
pca_size = 50
batch_size = 128
device = train.get_device()
epochs= 10

In [None]:
results = pd.DataFrame()

model_name = "GraphConv"
normalize_weights = "log_per_cell"
node_features = "scale"
same_edge_values = False
edge_norm = True
hidden_relu = False
hidden_bn = False
n_layers = 1
hidden_dim = 200
hidden = [300]
nb_genes = 3000
activation = F.relu


data_mat = h5py.File(f"{path}/real_data/{dataset}.h5", "r")
for factor in [ 50,30, 20, 10, 5, 1]:
    Y_o = np.array(data_mat['Y'])[:1000]
    X_o = np.array(data_mat['X'])[:1000][:,: 1000]
    X = np.tile(X_o.T, factor).T
    Y= np.tile(Y_o.T, factor).T
    print(">> ",X_o.shape, X.shape)

    genes_idx, cells_idx = train.filter_data(X, highly_genes=nb_genes)
    X = X[cells_idx][:, genes_idx]
    Y = Y[cells_idx]
    n_clusters = len(np.unique(Y))

    t0 = time.time()
    graph = train.make_graph(
        X,
        Y,
        dense_dim=pca_size,
        node_features=node_features,
        normalize_weights=normalize_weights,
    )

    labels = graph.ndata["label"]
    train_ids = np.where(labels != -1)[0]

    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(n_layers)
    batch_size = 64 if X.shape[0] >10000 else 128
    dataloader = dgl.dataloading.NodeDataLoader(
        graph,
        train_ids,
        sampler,
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
        num_workers=1,
    )
    print(
        f"INPUT: {model_name}  {hidden_dim}, {hidden}, {same_edge_values}, {edge_norm}"
    )
    t1 = time.time()

    for run in range(3):
        t_start = time.time()
        torch.manual_seed(run)
        torch.cuda.manual_seed_all(run)
        np.random.seed(run)
        random.seed(run)

        model = models.GCNAE(
            in_feats=pca_size,
            n_hidden=hidden_dim,
            n_layers=n_layers,
            activation=activation,
            dropout=0.1,
            hidden=hidden,
            hidden_relu=hidden_relu,
            hidden_bn=hidden_bn,
        ).to(device)
        if run == 0:
            print(f">", model)

        optim = torch.optim.Adam(model.parameters(), lr=1e-5)

        scores = train.train(model, optim, epochs, dataloader, n_clusters, plot=False,
                            cluster=["KMeans"])
        scores["dataset"] = dataset
        scores["run"] = run
        scores["nb_genes"] = nb_genes
        scores["hidden"] = str(hidden)
        scores["hidden_dim"] = str(hidden_dim)
        scores["tot_kmeans_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['kmeans_time']
#         scores["tot_leiden_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['leiden_time']
        scores["time_graph"] = t1-t0
        scores["time_training"] = (scores['ae_end'] - t_start)
        scores["nb_cells"] = X.shape[0]
        results = results.append(scores, ignore_index = True)

        results.to_pickle(
            f"../output/pickle_results/{category}/{category}_scalability_cells.pkl")
        print("Done")

In [9]:
results = pd.DataFrame()

model_name = "GraphConv"
normalize_weights = "log_per_cell"
node_features = "scale"
same_edge_values = False
edge_norm = True
hidden_relu = False
hidden_bn = False
n_layers = 1
hidden_dim = 200
hidden = [300]
nb_genes = 3000
pca_size = 50
activation = F.relu
data_mat = h5py.File(f"{path}/real_data/{dataset}.h5", "r")


for factor in [25, 20, 15, 10, 5, 1 ]:
    Y_o = np.array(data_mat['Y'])[:1000]
    X_o = np.array(data_mat['X'])[:1000][:,: 1000]
    
    X = np.tile(X_o, factor)
    Y= Y_o
    print(">> ",X_o.shape, X.shape)
    nb_input_genes =X.shape[1]


    genes_idx, cells_idx = train.filter_data(X, highly_genes=3000)
    X = X[cells_idx][:, genes_idx]
    Y = Y[cells_idx]
    n_clusters = len(np.unique(Y))
    t0 = time.time()
    graph = train.make_graph(
        X,
        Y,
        dense_dim=pca_size,
        node_features=node_features,
        normalize_weights=normalize_weights,
    )

    labels = graph.ndata["label"]
    train_ids = np.where(labels != -1)[0]

    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(n_layers)

    dataloader = dgl.dataloading.NodeDataLoader(
        graph,
        train_ids,
        sampler,
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
        num_workers=1,
    )
    print(
        f"INPUT: {model_name}  {hidden_dim}, {hidden}, {same_edge_values}, {edge_norm}"
    )
    t1 = time.time()

    for run in range(10):
        t_start = time.time()
        torch.manual_seed(run)
        torch.cuda.manual_seed_all(run)
        np.random.seed(run)
        random.seed(run)

        model = models.GCNAE(
            in_feats=pca_size,
            n_hidden=hidden_dim,
            n_layers=n_layers,
            activation=activation,
            dropout=0.1,
            hidden=hidden,
            hidden_relu=hidden_relu,
            hidden_bn=hidden_bn,
        ).to(device)
        if run == 0:
            print(f">", model)

        optim = torch.optim.Adam(model.parameters(), lr=1e-5)

        scores = train.train(model, optim, epochs, dataloader, n_clusters, plot=False,
                            cluster=["KMeans", "Leiden"])
        scores["dataset"] = dataset
        scores["run"] = run
        scores["hidden"] = str(hidden)
        scores["hidden_dim"] = str(hidden_dim)
        scores["tot_kmeans_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['kmeans_time']
        scores["tot_leiden_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['leiden_time']
        scores["time_graph"] = t1-t0
        scores["time_training"] = (scores['ae_end'] - t_start)
        scores["nb_cells"] = X.shape[0]
        scores["nb_genes"] =nb_input_genes

        results = results.append(scores, ignore_index = True)

        results.to_pickle(
            f"../output/pickle_results/{category}/{category}_scalability_genes_pca.pkl")
        print("Done")

>>  (870, 1000) (870, 25000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0997
ARI 0.0997, 0.010687904432415962
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1394
ARI 0.1394, 0.04373324289917946
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1103
ARI 0.1103, 0.05161091685295105
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1317
ARI 0.1317, 0.04270748794078827
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1265
ARI 0.1265, 0.030594130977988243
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0906
ARI 0.0906, 0.04912831261754036
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0835
ARI 0.0835, 0.016826625913381577
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1463
ARI 0.1463, 0.035340603440999985
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1271
ARI 0.1271, 0.04270720109343529
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1129
ARI 0.1129, 0.03930862247943878
Done
>>  (870, 1000) (870, 20000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1144
ARI 0.1144, 0.01829533651471138
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1424
ARI 0.1424, 0.07113183289766312
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1204
ARI 0.1204, 0.025410864502191544
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1335
ARI 0.1335, 0.04548206552863121
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1259
ARI 0.1259, 0.02403167635202408
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0938
ARI 0.0938, 0.0167570598423481
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0676
ARI 0.0676, 0.01105012558400631
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1756
ARI 0.1756, 0.059373121708631516
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1541
ARI 0.1541, 0.0342535562813282
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1993
ARI 0.1993, 0.06484012305736542
Done
>>  (870, 1000) (870, 15000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.132
ARI 0.132, 0.03695393726229668
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1747
ARI 0.1747, 0.054461412131786346
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.0746
ARI 0.0746, 0.012572257779538631
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1413
ARI 0.1413, 0.12269509583711624
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1153
ARI 0.1153, 0.00011354794696671888
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1292
ARI 0.1292, 0.02878418006002903
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1819
ARI 0.1819, 0.09983208030462265
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1744
ARI 0.1744, 0.05270462855696678
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.125
ARI 0.125, 0.04888371378183365
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1123
ARI 0.1123, 0.027656421065330505
Done
>>  (870, 1000) (870, 10000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1878
ARI 0.1878, 0.044311970472335815
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1259
ARI 0.1259, 0.015819726511836052
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1305
ARI 0.1305, 0.03950841352343559
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1396
ARI 0.1396, 0.05003150925040245
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.175
ARI 0.175, 0.06153019517660141
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1267
ARI 0.1267, 0.047370702028274536
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1445
ARI 0.1445, 0.10377127677202225
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1807
ARI 0.1807, 0.03686369210481644
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1484
ARI 0.1484, 0.05211671441793442
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.1749
ARI 0.1749, 0.10652348399162292
Done
>>  (870, 1000) (870, 5000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.3854
ARI 0.3854, 0.007124831900000572
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4333
ARI 0.4333, 0.009405144490301609
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.41
ARI 0.41, 0.008441297337412834
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.477
ARI 0.477, 0.025491412729024887
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.3968
ARI 0.3968, 0.01893128827214241
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4269
ARI 0.4269, 0.027016568928956985
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.3069
ARI 0.3069, 0.019938774406909943
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.3229
ARI 0.3229, 0.019710488617420197
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.5455
ARI 0.5455, 0.019562972709536552
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4525
ARI 0.4525, 0.023802848532795906
Done
>>  (870, 1000) (870, 1000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f30300075f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.5736
ARI 0.5736, 0.036762963980436325
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.6018
ARI 0.6018, 0.029916848987340927
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.6036
ARI 0.6036, 0.033609788864851
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.5128
ARI 0.5128, 0.0321778878569603
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4065
ARI 0.4065, 0.025381188839673996
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4483
ARI 0.4483, 0.02108037658035755
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4941
ARI 0.4941, 0.02341347001492977
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.4952
ARI 0.4952, 0.021397128701210022
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.5618
ARI 0.5618, 0.03282079100608826
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


Coputing Kmeans 0.5753
ARI 0.5753, 0.031402718275785446
Done


In [4]:
results = pd.DataFrame()

model_name = "GraphConv"
normalize_weights = "log_per_cell"
node_features = "scale"
same_edge_values = False
edge_norm = True
hidden_relu = False
hidden_bn = False
n_layers = 1
hidden_dim = 200
hidden = [300]
nb_genes = 3000
activation = F.relu
data_mat = h5py.File(f"{path}/real_data/{dataset}.h5", "r")


for factor in [25, 20, 15, 10, 5, 1 ]:
    Y_o = np.array(data_mat['Y'])[:1000]
    X_o = np.array(data_mat['X'])[:1000][:,: 1000]
    
    X = np.tile(X_o, factor)
    Y= Y_o
    print(">> ",X_o.shape, X.shape)
    nb_input_genes = X.shape[1]#-1000


    genes_idx, cells_idx = train.filter_data(X, highly_genes=nb_input_genes)
    X = X[cells_idx][:, genes_idx]
    Y = Y[cells_idx]
    n_clusters = len(np.unique(Y))

    t0 = time.time()
    graph = train.make_graph(
        X,
        Y,
        dense_dim=pca_size,
        node_features=node_features,
        normalize_weights=normalize_weights,
    )

    labels = graph.ndata["label"]
    train_ids = np.where(labels != -1)[0]

    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(n_layers)

    dataloader = dgl.dataloading.NodeDataLoader(
        graph,
        train_ids,
        sampler,
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
        num_workers=1,
    )
    print(
        f"INPUT: {model_name}  {hidden_dim}, {hidden}, {same_edge_values}, {edge_norm}"
    )
    t1 = time.time()

    for run in range(3):
        t_start = time.time()
        torch.manual_seed(run)
        torch.cuda.manual_seed_all(run)
        np.random.seed(run)
        random.seed(run)

        model = models.GCNAE(
            in_feats=pca_size,
            n_hidden=hidden_dim,
            n_layers=n_layers,
            activation=activation,
            dropout=0.1,
            hidden=hidden,
            hidden_relu=hidden_relu,
            hidden_bn=hidden_bn,
        ).to(device)
        if run == 0:
            print(f">", model)

        optim = torch.optim.Adam(model.parameters(), lr=1e-5)

        scores = train.train(model, optim, epochs, dataloader, n_clusters, plot=False,
                            cluster=["KMeans", "Leiden"])
        scores["dataset"] = dataset
        scores["run"] = run
        scores["hidden"] = str(hidden)
        scores["hidden_dim"] = str(hidden_dim)
        scores["tot_kmeans_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['kmeans_time']
        scores["tot_leiden_time"] = (t1-t0) + (scores['ae_end'] - t_start) + scores['leiden_time']
        scores["time_graph"] = t1-t0
        scores["time_training"] = (scores['ae_end'] - t_start)
        scores["nb_cells"] = X.shape[0]
        scores["nb_genes"] = nb_input_genes

        results = results.append(scores, ignore_index = True)

        results.to_pickle(
            f"../output/pickle_results/{category}/{category}_scalability_genes.pkl")
        print("Done")

>>  (870, 1000) (870, 25000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.6031, 0.03938838094472885
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5272, 0.0332205556333065
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.4593, 0.02030372992157936
Done
>>  (870, 1000) (870, 20000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5732, 0.029891682788729668
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5272, 0.03322254493832588
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.4895, 0.022878339514136314
Done
>>  (870, 1000) (870, 15000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.6059, 0.036705292761325836
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5217, 0.03324050083756447
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.4378, 0.021201910451054573
Done
>>  (870, 1000) (870, 10000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.6093, 0.03673866391181946
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5211, 0.032419078052043915
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.3333, 0.008299482055008411
Done
>>  (870, 1000) (870, 5000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5765, 0.03513171151280403
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5156, 0.025298887863755226
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.4289, 0.027904486283659935
Done
>>  (870, 1000) (870, 1000)




INPUT: GraphConv  200, [300], False, True
> GCNAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (layer1): WeightedGraphConv(in=50, out=200, normalization=both, activation=<function relu at 0x7f5d4535f5f0>)
  (decoder): InnerProductDecoder()
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=300, bias=True)
  )
)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.5736, 0.036762963980436325
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.6018, 0.029916848987340927
Done


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI 0.6036, 0.0336097851395607
Done


In [5]:
results.groupby("nb_genes").mean()

Unnamed: 0_level_0,ae_end,kmeans_ari,kmeans_cal,kmeans_nmi,kmeans_sil,kmeans_time,leiden_ari,leiden_cal,leiden_nmi,leiden_sil,leiden_time,nb_cells,run,time_graph,time_training,tot_kmeans_time,tot_leiden_time
nb_genes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1000.0,1615631000.0,0.593,32.743877,0.496133,0.03343,0.227337,0.409067,27.020869,0.430267,0.031177,1.945721,870.0,1.0,0.294626,3.575556,4.097519,5.815903
5000.0,1615631000.0,0.507,31.548197,0.441833,0.029445,0.238846,0.381767,23.463431,0.407433,0.011417,1.936401,870.0,1.0,0.651714,5.146484,6.037043,7.734599
10000.0,1615631000.0,0.4879,30.482692,0.426633,0.025819,0.262302,0.3897,22.690557,0.4151,0.019137,1.842108,870.0,1.0,1.079715,6.088245,7.430261,9.010068
15000.0,1615631000.0,0.5218,31.140607,0.452667,0.030383,0.269096,0.3896,22.913496,0.405133,0.01985,1.704811,870.0,1.0,1.500887,8.160946,9.930929,11.366645
20000.0,1615631000.0,0.529967,30.730484,0.4516,0.028664,0.259834,0.412333,23.645408,0.436133,0.008255,1.795936,870.0,1.0,1.918511,10.010649,12.188995,13.725096
25000.0,1615631000.0,0.529867,30.66818,0.4526,0.030971,0.251649,0.399667,22.92317,0.423,0.011237,2.443501,870.0,1.0,2.214043,12.555265,15.020957,17.212809


In [7]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)

70500
