# Comparison in single-cell data



## Data preprocessing

In [11]:
#### charging data and processing
exp<-readRDS("../data/single-cell/CellLines_RNAseqCounts.RDS", refhook = NULL) #ENS for genes and counts
exp<-log2(exp+1)
atac_counts<-readRDS("../data/single-cell/CellLines_ATACseqCounts.RDS", refhook = NULL) # peaks counts
metadata<-readRDS("../data/single-cell/CellLines_metadata.RDS", refhook = NULL)
colnames(atac_counts)<-metadata[,1]
write.table(exp,"../data/single-cell/CellLines_RNAseqCounts.txt",sep="\t",col.names=T,row.names=T)
system("sed -i '1s/^/probe\t/' ../data/single-cell/CellLines_RNAseqCounts.txt")
write.table(atac_counts,"../data/single-cell/CellLines_ATACseqCounts.txt",sep="\t",col.names=T,row.names=T)
system("sed -i '1s/^/probe\t/' ../data/single-cell/CellLines_ATACseqCounts.txt")

## Running comparison

In [12]:
library("ggplot2")
library("clusterCrit")
source("runfactorization.R")

##plot parameters
dot_size=1.5
dot_alpha=1.0
xlabel <- "Factor 1"
ylabel <- "Factor 2"

metadata<-readRDS("../data/single-cell/CellLines_metadata.RDS", refhook = NULL)
sample_annot<-as.matrix(cbind(metadata[,1],metadata$celltype))


###factorization
out<-runfactorization("../data/single-cell/",c("CellLines_RNAseqCounts.txt","CellLines_ATACseqCounts.txt"),2,sep="\t",filtering="stringent")
c_index<-numeric(0)
for(i in 1:length(out$factorizations)){
    factors<-out$factorizations[[i]][[1]]

    ##delete NAs
    factors<-factors[which(!is.na(factors[,1])),]
    sample_annot<-sample_annot[which(!is.na(factors[,1])),]
    factors<-factors[which(!is.na(factors[,2])),]
    sample_annot<-sample_annot[which(!is.na(factors[,2])),]

    ## Plot
    df = data.frame(x =  factors[,1], y = factors[,2], color_by = sample_annot[,2])

    p <- ggplot(df, aes_string(x = "x", y = "y")) + 
       geom_point(aes_string(color = "color_by"), size=dot_size, alpha=dot_alpha) + 
       xlab(xlabel) + ylab(ylabel) +
       # scale_shape_manual(values=c(19,1,2:18)[seq_along(unique(shape_by))]) +
       theme(plot.margin = margin(20, 20, 10, 10), 
             axis.text = element_text(size = rel(1), color = "black"), 
             axis.title = element_text(size = 16), 
             axis.title.y = element_text(size = rel(1.1), margin = margin(0, 10, 0, 0)), 
             axis.title.x = element_text(size = rel(1.1), margin = margin(10, 0, 0, 0)), 
             axis.line = element_line(color = "black", size = 0.5), 
             axis.ticks = element_line(color = "black", size = 0.5),
             panel.border = element_blank(), 
             panel.grid.major = element_blank(),
             panel.grid.minor = element_blank(), 
             panel.background = element_blank(),
             legend.key = element_rect(fill = "white"),
             legend.text = element_text(size = 16),
             legend.title = element_text(size =16)
       )

     p + scale_color_manual(values=c("#0072B2", "#D55E00", "#CC79A7"))
     ggsave(paste("../results/plot_",out$method[i],".jpg",sep=""))


    ann<- sample_annot[,2]
    ann<-gsub("HCT",1,ann)
    ann<-gsub("Hela",2,ann)
    ann<-gsub("K562",3,ann)
    ann<-as.numeric(ann)
    c_index<-rbind(c_index,intCriteria(factors, as.integer(ann), crit=c("C_index")))

}

report_cindex<-cbind(out$methods,c_index)
report_cindex<-cbind(out$method,report_cindex)
write.table(report_cindex,"../results/singlecell_cindex.txt",sep="\t",col.names=F,row.names=F)


Saving 6.67 x 6.67 in image
Saving 6.67 x 6.67 in image
Saving 6.67 x 6.67 in image
Saving 6.67 x 6.67 in image
Saving 6.67 x 6.67 in image
