# Setup

In [1]:
suppressPackageStartupMessages({
    library(scater)
    library(scran)
    library(SingleCellExperiment)
    library(tidyverse)
    library(reticulate)
    library(BiocParallel)
    library(logger)
    library(showtext)
    library(batchelor)
})
options(repr.plot.width = 10, repr.plot.height = 8, repr.plot.res = 300)

In [2]:
set.seed(319)
cols = sample(colorRampPalette(ggsci::pal_futurama()(12))(30))

# Epithelial

## Read SCE

In [59]:
sce <- readRDS("../../data/expression/sce/sce_Smartseq2_scHCC-CD45_featureCounts_qc_clustered_epithelial.rds")
sce <- sce[grepl("protein_coding", rowData(sce)$Biotype),]
sce <- sce[grepl("^chr", rowData(sce)$Chr), ]

In [60]:
## sce <- sce[, !(sce$leiden_sub %in% c("C08", "C10"))]
sce <- sce[, !(sce$donor %in% c("D20171229"))]
sce <- sce[, !(sce$leiden_sub %in% c("C12"))]


In [61]:
keep <- calculateAverage(sce) > 1
sce <- sce[keep , ]

In [62]:
table(sce$donor)


D20170227 D20170322 D20170327 D20170412 D20171109 D20171215 D20171220 D20180108 
        2        31       205        17        18        69       181        23 
D20180110 
      268 

## Generate single cell count matrix


In [63]:
ct <- as.matrix(counts(sce))
#adat_counts_filtered = pd.DataFrame(adat_sub_filtered.layers['counts'].T.toarray(), index=overlap_genes, columns=adat_sub_filtered.obs.index)

In [64]:
write.table(ct, "./data_for_run/counts_hepatocytes_fromEpi_20210114.tsv", sep = '\t', quote = F, row.names = T)
#counts.to_csv(path_or_buf=os.path.join(BASE_DIR, 'analyses/inferCNV/data_for_run/count_SC_and_GTEx.tsv'), sep = '\t')

# Generate single cell annotation

In [65]:
annotation = colData(sce)[, c("cell.id", "donor", "leiden_sub")]
colnames(annotation) <- c("sample", "annotation", "cluster")
annotation <- as.data.frame(annotation)
#annotation$annotation <- sapply(strsplit(x = annotation$annotation, split = "-"), `[`, 1)
#annotation$annotation <- ifelse(grepl("Normal", annotation$annotation), "Normal", annotation$annotation)

In [66]:
annotation$annotation <- ifelse(grepl("C08|C10", annotation$cluster), annotation$cluster, annotation$annotation)

In [67]:
table(annotation$annotation)


      C08       C10 D20170227 D20170322 D20170327 D20170412 D20171109 D20171220 
       67        53         2        31       203        17        14       158 
D20180108 D20180110 
        2       267 

In [68]:
readr::write_tsv(annotation, file = "./data_for_run/annotation_hepatocytes_fromEpi_20210114.tsv", col_names = F)

## Generate gene info

In [69]:
geneorder <- cbind(gene = rownames(sce), as.data.frame(rowData(sce)[, c("Chr", "Start", "End")]))

In [70]:
geneorder$Start <- sapply(strsplit(geneorder$Start, ";"), `[[`, 1)
geneorder$End <- sapply(strsplit(geneorder$End, ";"), `[[`, 1)

In [71]:
readr::write_tsv(geneorder, path = "./data_for_run/gene_info.tsv", col_names = F)

# Stromal and Epi

In [4]:
sce1 <- readRDS("../../data/expression/sce/sce_Smartseq2_scHCC-CD45_featureCounts_qc_clustered_epithelial.rds")
sce1 <- sce1[grepl("protein_coding", rowData(sce1)$Biotype),]
sce1 <- sce1[grepl("^chr", rowData(sce1)$Chr), ]

In [5]:
sce2 <- readRDS("../../data/expression/sce/sce_Smartseq2_scHCC-CD45_featureCounts_qc_clustered_stromal.rds")
sce2 <- sce2[grepl("protein_coding", rowData(sce2)$Biotype),]
sce2 <- sce2[grepl("^chr", rowData(sce2)$Chr), ]

In [6]:
ct <- cbind(as.matrix(counts(sce1)), as.matrix(counts(sce2)))
#adat_counts_filtered = pd.DataFrame(adat_sub_filtered.layers['counts'].T.toarray(), index=overlap_genes, columns=adat_sub_filtered.obs.index)

In [7]:
write.table(ct, "./data_for_run/counts_stroma_20210421.tsv", sep = '\t', quote = F, row.names = T)
#counts.to_csv(path_or_buf=os.path.join(BASE_DIR, 'analyses/inferCNV/data_for_run/count_SC_and_GTEx.tsv'), sep = '\t')

# Generate single cell annotation

In [9]:
annotation = rbind(colData(sce1)[, c("cell.id", "donor", "leiden_sub")], colData(sce2)[, c("cell.id", "donor", "leiden_sub")])
colnames(annotation) <- c("sample", "annotation", "cluster")
annotation <- as.data.frame(annotation)

In [15]:
readr::write_tsv(annotation, file = "./data_for_run/annotation_stroma_20210421.tsv", col_names = F)

## Generate gene info

In [16]:
geneorder <- cbind(gene = rownames(sce1), as.data.frame(rowData(sce1)[, c("Chr", "Start", "End")]))

In [17]:
geneorder$Start <- sapply(strsplit(geneorder$Start, ";"), `[[`, 1)
geneorder$End <- sapply(strsplit(geneorder$End, ";"), `[[`, 1)

In [18]:
readr::write_tsv(geneorder, path = "./data_for_run/gene_info_stroma.tsv", col_names = F)

“The `path` argument of `write_tsv()` is deprecated as of readr 1.4.0.
Please use the `file` argument instead.
