In [12]:
import torch

In [13]:
torch.__version__

'1.4.0'

In [1]:
import numpy as np
import dgl
from dgl import DGLGraph
import torch
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from collections import Counter
import pickle
import h5py
import random
import glob2
import seaborn as sns
import scanpy.api as sc
import train
import models
from sklearn.metrics import (accuracy_score, adjusted_rand_score,
                             calinski_harabasz_score,
                             normalized_mutual_info_score, silhouette_score)
%load_ext autoreload
%autoreload 2

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
device = train.get_device()

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)



In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.



In [8]:
results = pd.DataFrame()
model_name = "GraphConv"
normalize_weights = "log_per_cell"
node_features = "scale"
same_edge_values = False
edge_norm = True
hidden_relu = False
hidden_bn = False
n_layers = 1
hidden_dim = 200
hidden = [300]
nb_genes = 3000
activation = F.relu
epochs = 10
batch_size = 128 
pca_size = 50
run = 0

In [6]:
data_mat = h5py.File(f"../../data/worm_neuron_cell.h5", "r")

In [9]:
Y = np.array(data_mat['Y'])
X = np.array(data_mat['X'])
n_clusters = len(np.unique(Y))

genes_idx, cells_idx = train.filter_data(X, highly_genes=nb_genes)
X = X[cells_idx][:, genes_idx]
Y = Y[cells_idx]

t0 = time.time()
graph = train.make_graph(
    X,
    Y,
    dense_dim=pca_size,
    node_features=node_features,
    normalize_weights=normalize_weights,
)

labels = graph.ndata["label"]
train_ids = np.where(labels != -1)[0]

sampler = dgl.dataloading.MultiLayerFullNeighborSampler(n_layers)

dataloader = dgl.dataloading.NodeDataLoader(
    graph,
    train_ids,
    sampler,
    batch_size=batch_size,
    shuffle=True,
    drop_last=False,
    num_workers=1,
)
print(
    f"INPUT: {model_name}  {hidden_dim}, {hidden}, {same_edge_values}, {edge_norm}"
)
t1 = time.time()
resolution = 0.05



INPUT: GraphConv  200, [300], False, True


In [10]:
t_start = time.time()
torch.manual_seed(run)
torch.cuda.manual_seed_all(run)
np.random.seed(run)
random.seed(run)

model = models.GCNAE(
    in_feats=pca_size,
    n_hidden=hidden_dim,
    n_layers=n_layers,
    activation=activation,
    dropout=0.1,
    hidden=hidden,
    hidden_relu=hidden_relu,
    hidden_bn=hidden_bn,
).to(device)

optim = torch.optim.Adam(model.parameters(), lr=1e-5)

scores = train.train(
    model,
    optim,
    epochs,
    dataloader,
    n_clusters,
    plot=False,
    save = False,
    cluster=["Leiden"],
    cluster_params={"Leiden": {
        "resolution": resolution

    }})



HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


ARI None, None


In [11]:
print(scores)

{'ae_end': 1629375557.994572, 'leiden_ari': 0.1722, 'leiden_nmi': 0.5122, 'leiden_sil': 0.19860229, 'leiden_cal': 329.4650670307674, 'leiden_pred': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 1, 0, 0, 0, 0, 1, 0, 3, 0, 3, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 1, 5, 0, 0, 1, 0, 0, 0, 0, 0, 6, 7, 3, 0, 0, 5, 3, 0, 0, 7, 0, 2, 0, 1, 3, 6, 0, 1, 0, 2, 1, 6, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0, 4, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 6, 3, 1, 1, 0, 2, 6, 1, 7, 1, 3, 0, 7, 2, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 1, 0, 0, 7, 0, 0, 6, 2, 0, 0, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 2, 3, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 7, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2, 3, 6, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 6, 0, 2, 0, 0, 0, 0, 7, 1, 6, 2, 0, 0, 0, 0, 1, 2, 0, 2, 3, 0, 1, 0, 0, 0, 0, 0, 4, 0, 6, 5, 3, 0, 0, 0, 0, 1, 2, 0, 0, 3, 0, 2, 3, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,