In [None]:
#install.packages("singleCellHaystack")

In [None]:
getwd()
setwd('..')
getwd()

In [None]:
library(singleCellHaystack)

In [None]:
set.seed(1234)

In [None]:
library(Matrix)
library(Seurat)
library(mclust)
library(SingleCellExperiment)
#library(clustree)
#citation("mclust")

In [None]:
read_in_data <-function(save_name){
    exp_data=readMM(sprintf("datasets/extract/%s.data.counts.mm",save_name))
 
    exp_data_row=read.table(sprintf('datasets/extract/%s.data.row',save_name))$V1
    exp_data_col=read.table(sprintf('datasets/extract/%s.data.col',save_name))$V1    
    rownames(exp_data)=exp_data_row
    colnames(exp_data)=exp_data_col    

    metadatarow=read.table(sprintf('datasets/extract/%s.metadatarow.tsv',save_name),sep='\t')
    metadatacol=read.table(sprintf('datasets/extract/%s.metadatacol.tsv',save_name),sep='\t')  
    
    metadatacol['size_factor']=read.table(sprintf('datasets/extract/%s.size_factor.tsv',save_name),sep='\t')$V1

    sce <- SingleCellExperiment(list(counts=as.matrix(exp_data)),rowData=metadatarow,colData=metadatacol)
    sce
}  

calculate_low_dim <-function(sce, pca_dim=10){
    counts = assay(sce, "counts")
    seurat <- CreateSeuratObject(counts = counts, project = "scRNAseq", assay = "RNA",
                                         min.cells = 0, min.features = 0)
    
    seurat <- NormalizeData(seurat); seurat <- ScaleData(seurat,features = rownames(seurat)); seurat <- FindVariableFeatures(seurat, selection.method = "vst", nfeatures = 2000)
    seurat <- RunPCA(seurat, features = VariableFeatures(object = seurat)); seurat <- RunTSNE(seurat, dims= 1:pca_dim)
    reducedDims(sce) <- list(PCA=Embeddings(seurat, reduction = "pca")[,1:pca_dim], TSNE=Embeddings(seurat, reduction = "tsne"))
    sce
}

In [None]:
dataset_name_all=list(
'Kohinbulk_filtered',
'HumanLiver_filtered',
'Zhengmix8eq_filtered')

length(dataset_name_all)

In [None]:
dataset_name_all=list()
for (ncells_total in c('1000','2000','5000','10000')){
    for (prop in c('1e-2','5e-3','1e-3','5e-4')){
        for(i in c(1:10)){
            dataset_name<-sprintf('Simul_%s_%s_%s_filtered',ncells_total,prop,i)
            dataset_name_all<-append(dataset_name_all,dataset_name)
        }

    }
}

In [None]:
dataset_name_all

In [None]:
tabula_name_all=list("TabulaAorta_filtered",
"TabulaBladder_filtered",
"TabulaBrainMyeloid_filtered",
"TabulaBrainNonMyeloid_filtered",
"TabulaDiaphragm_filtered",
"TabulaFat_filtered",
"TabulaHeart_filtered",
"TabulaKidney_filtered",
"TabulaLargeIntestine_filtered",
"TabulaLimbMuscle_filtered",
"TabulaLiver_filtered",
"TabulaLung_filtered",
"TabulaMammaryGland_filtered",
"TabulaMarrow_filtered",
"TabulaPancreas_filtered",
"TabulaSkin_filtered",
"TabulaSpleen_filtered",
"TabulaThymus_filtered",
"TabulaTongue_filtered",
"TabulaTrachea_filtered")

In [None]:
#taskset -c 12,13,14,15 Rscript 4_exp1_step2_clustering_1-Copy2.r
#taskset -c 16,17,18,19 Rscript 4_exp1_step2_clustering_1-Copy3.r

In [None]:
head(dataset_name_all)

In [None]:
dataset_name_all=tabula_name_all

In [None]:
dataset_name_all

In [None]:
for(dataset_name in dataset_name_all[1:length(dataset_name_all)]){
    print(dataset_name)
    
    start_time=as.numeric(Sys.time())
    
    seurat=read_in_data(dataset_name)
    
    seurat=calculate_low_dim(seurat, pca_dim=10)
    
    end_time=as.numeric(Sys.time())
    
    time_preprocessing=end_time-start_time
    
    start_time=as.numeric(Sys.time())
    res <- haystack(sce, cutoff=apply(assay(sce, "counts"), 1, 'median'))
    end_time=as.numeric(Sys.time())
    time=end_time-start_time
    
    
    write.table(res$result[order(res$result['log.p.adj']),],
                file=sprintf('datasets/extract/%s.hvg.ht.pca.tsv',dataset_name),
                sep='\t',
                row.names=TRUE, 
                col.names=TRUE)   
    
    write.table(time_preprocessing+time,
                file=sprintf('datasets/extract/%s.hvg.ht.pca.runtime.tsv',dataset_name),
                sep='\t',
                row.names=FALSE, 
                col.names=FALSE)          
    
    
}