# CCNMF analyze the real data

In [2]:
library(NMF)
library(dplyr)
library(tidyr)

library(Matrix)
library(devtools)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)

library(pheatmap)
library(ggplot2)
library(Seurat)
library(stringr)
library(Rtsne)
library(uwot)
library(mclust)
library(biomaRt)
library(cowplot)
library(ggplotify)
library(RColorBrewer)
library(minerva)

Download the [ovarian cancer cell lines](https://zenodo.org/record/2363826#.XjkYohNKhE4) from [clonealign: statistical integration of independent single-cell RNA and DNA sequencing data from human cancers.](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1645-z) 

Input the single-cell RNA data and copy number data

In [None]:
pathDNA <- '/Users/Amssbaixiangqi/Desktop/Data/clonealign-processed-data/T_OV2295/cnv'
pathRNA_TOV <- '/Users/Amssbaixiangqi/Desktop/Data/clonealign-processed-data/T_OV2295/10X/TOV2295/outs/filtered_gene_bc_matrices/hg19'
pathRNA_OV <-'/Users/Amssbaixiangqi/Desktop/Data/clonealign-processed-data/T_OV2295/10X/OV2295n2/outs/filtered_gene_bc_matrices/hg19'

### Read All CNV data
OV_CNV <- read.csv(file.path(pathDNA, 'cnv_data.csv'))

TOV_gene <- read.table(file.path(pathRNA_TOV, 'genes.tsv'))
OV_gene <- read.table(file.path(pathRNA_OV, 'genes.tsv'))

# Load single-cell data include 4918 cells with 32738 genes 
TOV_RNA <- InputRNA(pathRNA_TOV)
# 1717 cells with 32738 genes 
OV_RNA <- InputRNA(pathRNA_OV)

# Remove the repeat barcodes 
commonBarcodes <- intersect(colnames(OV_RNA), colnames(TOV_RNA))
index <- matrix(0, 1, length(commonBarcodes))
for (i in 1:length(commonBarcodes)){
  index[i] <- which(colnames(TOV_RNA) == commonBarcodes[i])
}
colnames(TOV_RNA)[index] = paste0(commonBarcodes, 'T')
###Combine these two datasets
AllRNA <- cbind(TOV_RNA, OV_RNA)


In [None]:
# Function for handling copy number data to defauu\lt input 
ProcessOriginalData <- function(Data){
  singlecellid <- Data$single_cell_id
  Outputdata <- data.frame(
    chr=Data$chr[which(singlecellid == singlecellid[1])],
    start=Data$start[which(singlecellid == singlecellid[1])],
    end=Data$end[which(singlecellid == singlecellid[1])]
    )
  Outputdata$width <- Outputdata$end - Outputdata$start
  N <- length(which(singlecellid == singlecellid[1]))
  for (i in 1: length(unique(Data$single_cell_id))){
    Cell <- Data$copy_number[as.integer((i-1)*N + 1) : as.integer(i * N)]
    Outputdata <- cbind(Outputdata,Cell)
  }
  #cell_number <- length(unique(Data$single_cell_id))
  colnames(Outputdata)[5: dim(Outputdata)[2]] <- levels(unique(Data$single_cell_id)[1])
  return(Outputdata)
}

In [None]:
CNVmatrix <- ProcessOriginalData(OV_CNV)
RNAobject <- run_Seurat_RNA(AllRNA)
sigmoid <- function(x){1/(1+ exp(-x))}
RNAmatrix1 <- RNAobject@assays$RNA@scale.data[RNAobject@assays$RNA@meta.features$vst.variable, ]
replaceindex <- which(RNAmatrix1 < 0 )
RNAmatrix2 <- RNAmatrix1
RNAmatrix2[replaceindex] <- sigmoid(RNAmatrix1)[replaceindex]

Estimate the corespponding gene region and chromosome bins by reference genome 'hg19'.

In [None]:
InterVariable <- Estimate_Coupled_matrix(RNAmatrix2, CNVmatrix, reference_name = 'hg19')

In [None]:
CNVmatrix <- InterVariable[[1]]
RNAmatrix <- AllRNA[rownames(CNVmatrix), ]
RNAscale <- RNAscale[rownames(CNVmatrix), ]
CoupledMatrix <- InterVariable[[3]]

Gene <- read.table(file.path(pathRNA_TOV, 'genes.tsv'))
Genename <- ConvertGenenames(rownames(CNVmatrix), Gene, Logic = FALSE)
rownames(CNVmatrix) <- Genename
rownames(RNAmatrix) <- Genename
rownames(RNAscale) <- Genename

ResultsCCNMF <- run_CCNMF(ncluster = 2, CNVmatrix, RNAscale, CoupledMatrix, lambda1=1, lambda2=1, mu=1)
S1 <- ResultsCCNMF[[5]]
S2 <- ResultsCCNMF[[6]]

RNADE <- DiffExp(RNAmatrix, S2)

In [None]:
X <- CNVmatrix
a <- median(CNVmatrix)
b <- max(CNVmatrix)
c <- min(CNVmatrix)
X1 <- (CNVmatrix-a)/(b - a)
X2 <- (CNVmatrix-a)/(a- c)

X[which(CNVmatrix > a)] <- X1[which(CNVmatrix > a)] * 2
X[which(CNVmatrix <= a)] <- X2[which(CNVmatrix <= a)] * 2

PlotMainResult(X, RNAscale, ResultsCCNMF, RNADE)

![](TOV_2clusters.pdf)