In [198]:
import numpy as np
import pandas as pd
import scanpy as sc
import math
import anndata2ri
from rpy2.robjects import r
from rpy2.robjects.conversion import localconverter
import GenSA
import scipy
from scipy.optimize import dual_annealing
from annoy import AnnoyIndex
import random
from igraph import *
from sklearn.preprocessing import normalize
import sys
import leidenalg

### Required Functions

In [215]:
def optimised_param(partition, nsamples = 500):
    if(nsamples> partiton.shape[0]):
        nsamples = partiton.shape[0]
    
    global_min = 0
    tol = 1e-3
    max_time = 20
    lower = np.array([0.05, 0.9, 500])
    upper = np.array([0.1, 0.95, 4000])
    
    params = None
    
    def pin_find(params, max_c = nsamples):
        output = np.array(partition, dtype = float)
        pinit = params[0]
        pfin = params[1]
        K = params[2]
        cluster_freq = np.array(np.unique(output[:][1], return_counts=True).T)
        prop = np.round((pinit - np.exp(-cluster_freq/K) * (pinit - pfin) )* cluster_freq)
        cluster_freq = np.vstack(cluster_freq, prop).T
        
        subsamples_lovain = []
        for i in range(len(prop)):
            subsamples_lovain = subsamples_lovain + np.random.choice(output[np.nonzero(output[:][1]==i)][0], size = prop[k], replace = False)

        subsamples_lovain = np.asarray(subsamples_lovain)
        
        return np.abs(max_c - subsamples_lovain.shape[0]) 

        # FIX ME: numpy.reshape() of some kind to be done here
        # prop = reshape2::melt(prop)$value
        
    
#     out = dual_annealing(func=pin_find, x0 = params, bounds = list(zip(lower, uppper)), seed=1234)
    
    out = gensa(func = pin_find, x0 = params, bounds = list(zip(lower, uppper)), maxtime = max_time, know_real = True, real_threshold =  global_min + tol)
    
#     print(out)
    
    return np.asarray((out.fun, out.x, out.nfev))

def annPartition(data):
    f = data.shape[0]
    print("datashpae",data.shape)
    print("Building graphs with", data.shape[1], "nodes...")
    t = AnnoyIndex(f, 'angular')
    for i in range(data.shape[1]):
        v = data[:,i]
        t.add_item(i, v)
    
    t.build(30)
    
    get_nn = lambda x: t.get_nns_by_item(x, 6)

    indices = list(map(get_nn, np.arange(data.shape[1])));
    
    indices = np.array(indices)
#     print(indices)
    
    fin = []
    for i in range(indices.shape[0]):
        for j in indices[i]:
            fin.append((i,j))
    fin = np.array(fin)
    g = Graph(fin)
    G = g.simplify(multiple=False, loops = False)
    print("Louvain Partition...")
    partition = G.community_leiden(objective_function = "modularity");
#     print(partition.membership)
    
    dataMatrix = np.c_[np.arange(data.shape[1]),np.array(partition.membership)]

    return dataMatrix
        
    

### Script Begins here

In [217]:
adata = sc.read_10x_mtx('hg19/', var_names='gene_symbols', cache=True)   
# print(adata)

In [218]:
ob = adata.X
ob = scipy.sparse.csr_matrix.toarray(ob)
print(ob.shape)


(2700, 32738)


In [219]:
# (adata)
nsamples=500
method = "sps"
optm_parameters=False
pinit=0.195
pfin = 0.9
K=500

In [220]:
# if(nsamples>adata.shape[1]):
#     return adata

# if(method not in ["random", "sps"]):
#     print("Method not found")
#     exit()

no_samples = ob.shape[1]
init = no_samples if no_samples < 20000 else min(20000,round(no_samples/3))
print(init)

# random sample of ids from sample = 0 to no_samples - 1 of size init
sample_ids = np.random.choice(list(range(0, no_samples,1)), init) 
print(sample_ids.shape)
print(sample_ids)

10913
(10913,)
[ 9022 14083 27597 ... 14156  8145 25504]


In [221]:
if(method=="sps"):
    """
    if(!any(reducedDimNames(object)=="CComponents"))
        data = Log2Normalize(normcounts(object)[SingleCellExperiment::rowData(object)$HVG, sample_ids],return.sparse = FALSE)
    else
        data = as.matrix(normcounts(object)[, sample_ids])
    """
    
    data = normalize(ob)
    data = np.take(ob, sample_ids, axis = 1)
    print(data.shape)
    
    # return numpy array
    partition = annPartition(data)

    # ---- How to convert partition-----
    # data = pd.Series([1, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5])
    # data.value_counts()


    if(optm_parameters==True):
        param = optimized_param(partition, nsamples)
        pinit = param[0]
        pfin = param[1]
        K = param[2]
        print("Optimized parameters:\n", param,"\n")

    """

    #old seed
    
    # frequeny table of partition[:][1]
    cluster_freq = np.array(np.unique(partition[:][1], return_counts=True).T)
    prop = np.round((pinit - np.exp(-cluster_freq/K) * (pinit - pfin) )* cluster_freq)
    cluster_freq = np.vstack(cluster_freq, prop).T

    # FIX ME: numpy.reshape() of some kind to be done here
    # prop = reshape2::melt(prop)$value

    subsample = []

    for i in range(len(prop)):
        subsample = subsample + np.random.choice(partition[np.nonzero(partition[:][1]==i)], size = prop[k], replace = False)

    subsample = np.asarray(subsample)

    """

#     .Random.seed = oldseed
#     SummarizedExperiment::colData(object)$Sampling = rep(FALSE, ncol(object))
#     SummarizedExperiment::colData(object)$Sampling[sample_ids[subsamples]] =  TRUE

    """

    print(len(subsamples), "Samples extracted.\n")

    

    # Code below needs to be converted first

    # elif(method=="random"):
    #     """
    #     oldseed = .Random.seed
    #     subsamples = sample(sample_ids, nsamples)
    #     .Random.seed = oldseed
    #     SummarizedExperiment::colData(object)$Sampling = rep(FALSE, ncol(object))
    #     SummarizedExperiment::colData(object)$Sampling[subsamples] =  TRUE

    #     """
    # else:
    #     print("Invalid Sampling. Fallback to all samples")
    
    
    #object@metadata[["dropClust"]] = c(unlist(object@metadata[["dropClust"]]),"Sampling")



(2700, 10913)
datashpae (2700, 10913)
Building graphs with 10913 nodes...
Louvain Partition...
[[ 0  0]
 [ 1  0]
 [ 2  0]
 [ 3  0]
 [ 4  0]
 [ 5  7]
 [ 6  0]
 [ 7  0]
 [ 8  7]
 [ 9  0]
 [10 10]
 [11  0]
 [12 10]
 [13  0]
 [14  0]
 [15  0]
 [16  0]
 [17 10]
 [18  7]
 [19  0]
 [20  0]
 [21  1]
 [22  0]
 [23 10]
 [24  1]
 [25  0]
 [26  0]
 [27  0]
 [28  0]
 [29 10]
 [30  0]
 [31 10]
 [32  1]
 [33  0]
 [34  1]
 [35  1]
 [36  0]
 [37  0]
 [38  0]
 [39  1]
 [40  0]
 [41 10]
 [42  0]
 [43  0]
 [44  9]
 [45  1]
 [46  1]
 [47  0]
 [48  0]
 [49  0]
 [50  1]
 [51  0]
 [52  0]
 [53  3]
 [54  8]
 [55  0]
 [56  9]
 [57  1]
 [58  9]
 [59 10]
 [60  0]
 [61 10]
 [62  3]
 [63  0]
 [64 10]
 [65  0]
 [66  0]
 [67 10]
 [68 10]
 [69 10]
 [70  1]
 [71  0]
 [72  0]
 [73  0]
 [74  0]
 [75  0]
 [76  0]
 [77 10]
 [78 10]
 [79  4]
 [80  0]
 [81 10]
 [82  0]
 [83  0]
 [84  7]
 [85 10]
 [86  0]
 [87  6]
 [88  0]
 [89  5]
 [90 10]
 [91  1]
 [92 10]
 [93 10]
 [94  0]
 [95  4]
 [96  0]
 [97  7]
 [98 10]
 [99 10]]
