# Setup

In [1]:
suppressPackageStartupMessages({
    library(scater)
    library(scran)
    library(SingleCellExperiment)
    library(tidyverse)
    library(reticulate)
    library(BiocParallel)
    library(logger)
    library(showtext)
    library(batchelor)
})
options(repr.plot.width = 10, repr.plot.height = 8, repr.plot.res = 300)

In [2]:
set.seed(319)
cols = sample(colorRampPalette(ggsci::pal_futurama()(12))(30))

# Read SCE

In [3]:
sce <- readRDS("../../data/expression/sce/sce_Smartseq2_scHCC-CD45_featureCounts_qc_clustered_epithelial.rds")
sce <- sce[grepl("protein_coding", rowData(sce)$Biotype),]
sce <- sce[grepl("^chr", rowData(sce)$Chr), ]

In [4]:
#sce <- sce[, !(sce$leiden_sub %in% c("C08", "C10"))]
sce <- sce[, !(sce$donor %in% c("D20170227", "D20171215", "D20171229"))]
sce <- sce[, !(sce$leiden_sub %in% c("C12"))]


In [5]:
keep <- calculateAverage(sce) > 1
sce <- sce[keep , ]

# Generate single cell count matrix


In [7]:
ct <- as.matrix(counts(sce))
#adat_counts_filtered = pd.DataFrame(adat_sub_filtered.layers['counts'].T.toarray(), index=overlap_genes, columns=adat_sub_filtered.obs.index)

In [8]:
write.table(ct, "./data_for_run/counts_hepatocytes_fromEpi_20210114.tsv", sep = '\t', quote = F, row.names = T)
#counts.to_csv(path_or_buf=os.path.join(BASE_DIR, 'analyses/inferCNV/data_for_run/count_SC_and_GTEx.tsv'), sep = '\t')

# Generate single cell annotation

In [9]:
annotation = colData(sce)[, c("cell.id", "leiden_sub")]
colnames(annotation) <- c("sample", "annotation")
annotation <- as.data.frame(annotation)
#annotation$annotation <- sapply(strsplit(x = annotation$annotation, split = "-"), `[`, 1)
#annotation$annotation <- ifelse(grepl("Normal", annotation$annotation), "Normal", annotation$annotation)

In [10]:
table(annotation$annotation)


C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C14 C15 C16 
122 119  98  89  80  19  65  32  60   1  26  18  15 

In [12]:
readr::write_tsv(annotation, file = "./data_for_run/annotation_hepatocytes_fromEpi_20210114.tsv", col_names = F)

# Generate gene info

In [13]:
geneorder <- cbind(gene = rownames(sce), as.data.frame(rowData(sce)[, c("Chr", "Start", "End")]))

In [14]:
geneorder$Start <- sapply(strsplit(geneorder$Start, ";"), `[[`, 1)
geneorder$End <- sapply(strsplit(geneorder$End, ";"), `[[`, 1)

In [15]:
readr::write_tsv(geneorder, path = "./data_for_run/gene_info.tsv", col_names = F)

“The `path` argument of `write_tsv()` is deprecated as of readr 1.4.0.
Please use the `file` argument instead.
