# Comparison in single-cell data



## Data preprocessing

In [3]:
# Load data and processing

# Load RNA-seq data
exp <- readRDS("../data/single-cell/CellLines_RNAseqCounts.RDS", refhook = NULL) #ENS for genes and counts
# Apply log2 on RNA-seq data
exp <- log2(exp+1)
# Load ATAC-seq data
atac_counts<-readRDS("../data/single-cell/CellLines_ATACseqCounts.RDS", refhook = NULL) # peaks counts
# Load metadata
metadata<-readRDS("../data/single-cell/CellLines_metadata.RDS", refhook = NULL)
# Rename columns from metadata
colnames(atac_counts) <- metadata[,1]

# Export RNA-seq data as tab-separated table
write.table(exp, "../data/single-cell/CellLines_RNAseqCounts.txt", 
            sep="\t", col.names=TRUE, row.names=TRUE)
# Add a name ("probe") to the first column
system("sed -i '1s/^/probe\t/' ../data/single-cell/CellLines_RNAseqCounts.txt")
# Export ATAC-seq data as tab-separated table
write.table(atac_counts, "../data/single-cell/CellLines_ATACseqCounts.txt", 
            sep="\t", col.names=TRUE, row.names=TRUE)
# Add a name ("probe") to the first column
system("sed -i '1s/^/probe\t/' ../data/single-cell/CellLines_ATACseqCounts.txt")

## Running comparison

In [4]:
#save.image(file="state_env_scdata.RData")

In [5]:
library("ggplot2")
library("clusterCrit")
source("runfactorization.R")

# Parameters for the plots
dot_size <- 1.5
dot_alpha <-1.0
xlabel <- "Factor 1"
ylabel <- "Factor 2"

# Load annotations from the metadata
#metadata<-readRDS("../data/single-cell/CellLines_metadata.RDS", refhook = NULL)
#sample_annot <- as.matrix(cbind(metadata[,1], metadata$celltype))
sample_annot <- metadata[, c("sample.rna", "celltype")]


# Run factorization methods
out <- runfactorization("../data/single-cell/",
                        c("CellLines_RNAseqCounts.txt", "CellLines_ATACseqCounts.txt"),
                        2, 
                        sep="\t", 
                        filtering="stringent")

c_index <- numeric(0)

# For each factorization method
for(i in 1:length(out$factorizations)){
    
    # Get factorization result
    factors <- out$factorizations[[i]][[1]]

    # Delete NAs
    factors <- factors[!is.na(factors[,1]) & !is.na(factors[,2]), ]
    sample_annot <- sample_annot[!is.na(sample_annot[,1]) & !is.na(sample_annot[,2]), ]
    #factors<-factors[which(!is.na(factors[,1])),]
    #sample_annot <- sample_annot[which(!is.na(factors[,1])),]
    #factors <- factors[which(!is.na(factors[,2])),]
    #sample_annot <- sample_annot[which(!is.na(factors[,2])),]

    # Data to be plotted
    df <- data.frame(x =  factors[,1], y = factors[,2], color_by = sample_annot[,2])
    # Plot results
    p <- ggplot(df, aes_string(x = "x", y = "y")) + 
       geom_point(aes_string(color = "color_by"), size=dot_size, alpha=dot_alpha) + 
       xlab(xlabel) + ylab(ylabel) +
       # scale_shape_manual(values=c(19,1,2:18)[seq_along(unique(shape_by))]) +
       theme(plot.margin = margin(20, 20, 10, 10), 
             axis.text = element_text(size = rel(1), color = "black"), 
             axis.title = element_text(size = 16), 
             axis.title.y = element_text(size = rel(1.1), margin = margin(0, 10, 0, 0)), 
             axis.title.x = element_text(size = rel(1.1), margin = margin(10, 0, 0, 0)), 
             axis.line = element_line(color = "black", size = 0.5), 
             axis.ticks = element_line(color = "black", size = 0.5),
             panel.border = element_blank(), 
             panel.grid.major = element_blank(),
             panel.grid.minor = element_blank(), 
             panel.background = element_blank(),
             legend.key = element_rect(fill = "white"),
             legend.text = element_text(size = 16),
             legend.title = element_text(size =16)
       )
    p + scale_color_manual(values=c("#0072B2", "#D55E00", "#CC79A7"))
    # Export plot as JPEG image
    ggsave(paste("../results/plot_",out$method[i],".jpg",sep=""))

    # Encode cell type annotations by numeric codes
    ann <- sample_annot[,2]
    ann <- gsub("HCT",1,ann)
    ann <- gsub("Hela",2,ann)
    ann <- gsub("K562",3,ann)
    ann <- as.numeric(ann)
    # Compare factors and annotations
    c_index <- rbind(c_index, intCriteria(factors, as.integer(ann), crit=c("C_index")))

}

# ?
report_cindex <- cbind(out$methods, c_index)
report_cindex <- cbind(out$method, report_cindex)

# Export results as one tab-separated table
write.table(report_cindex, "../results/singlecell_cindex.txt", 
            sep="\t", col.names=FALSE, row.names=FALSE)


Loading required package: MASS
Loading required package: NMF
Loading required package: pkgmaker
Loading required package: registry

Attaching package: ‘pkgmaker’

The following object is masked from ‘package:base’:

    isFALSE

Loading required package: rngtools
Loading required package: cluster
NMF - BioConductor layer [OK] | Shared memory capabilities [NO: bigmemory] | Cores 7/8
  To enable shared memory capabilities, try: install.extras('
NMF
')
Loading required package: mclust
Package 'mclust' version 5.4.5
Type 'citation("mclust")' for citing this R package in publications.
Loading required package: InterSIM
Loading required package: tools
Loading required package: ade4

Attaching package: ‘ade4’

The following object is masked from ‘package:BiocGenerics’:

    score


Attaching package: ‘GPArotation’

The following object is masked from ‘package:NMF’:

    entropy


Attaching package: ‘MOFAtools’

The following objects are masked from ‘package:NMF’:

    featureNames, featureNam

[1] "No output file provided, using a temporary file..."


ERROR: Error in py_call_impl(callable, dots$args, dots$keywords): TypeError: 'NoneType' object is not callable

Detailed traceback: 
  File "/home/chernand/anaconda3/envs/momix/lib/python3.6/site-packages/mofa/core/entry_point.py", line 411, in train_model
    self.model = runMOFA(self.data, self.data_opts, self.model_opts, self.train_opts, self.train_opts['seed'])
  File "/home/chernand/anaconda3/envs/momix/lib/python3.6/site-packages/mofa/core/build_model.py", line 143, in runMOFA
    net.iterate()
  File "/home/chernand/anaconda3/envs/momix/lib/python3.6/site-packages/mofa/core/BayesNet.py", line 148, in iterate
    self.nodes[node].update()
  File "/home/chernand/anaconda3/envs/momix/lib/python3.6/site-packages/mofa/core/multiview_nodes.py", line 114, in update
    self.nodes[m].updateParameters()
  File "/home/chernand/anaconda3/envs/momix/lib/python3.6/site-packages/mofa/core/updates.py", line 252, in updateParameters
    term3 = 0.5*s.log(s.dot(ZZ[:,k], tau) + alpha[k])

