# Setup

In [None]:
dotenv::load_dot_env()

In [6]:
library(Seurat)
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.0     [32m✔[39m [34mpurrr  [39m 0.3.3
[32m✔[39m [34mtibble [39m 3.0.0     [32m✔[39m [34mdplyr  [39m 0.8.5
[32m✔[39m [34mtidyr  [39m 1.0.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [7]:
fg13 = readRDS(Sys.getenv("FG13srobj"))
fgmeta = read.table(Sys.getenv("FG13metafile"), sep="\t", header=1) %>% column_to_rownames('CellID')
fg13@meta.data = fgmeta[colnames(fg13), ] 
fg13$tier1name = gsub("Hs\\.", "FG\\.", fg13$tier1name)

In [8]:
cd14 = readRDS(Sys.getenv("CD14srobj"))
cdmeta = read.table(Sys.getenv("CD14metafile"), sep="\t", header=1) %>% column_to_rownames('CellID')
cd14@meta.data = cdmeta[colnames(cd14), ]

In [15]:
fg13 = fg13[, !grepl("(Doub)|(LowQual)", fg13$type)]
cd14 = cd14[, !grepl("(Doub)|(LowQual)", cd14$type)]

In [17]:
fg13

An object of class Seurat 
44291 features across 89849 samples within 2 assays 
Active assay: SCT (21591 features)
 1 other assay present: RNA
 2 dimensional reductions calculated: pca, umap

In [18]:
cd14

An object of class Seurat 
44884 features across 107432 samples within 2 assays 
Active assay: SCT (21865 features)
 1 other assay present: RNA
 2 dimensional reductions calculated: pca, umap

In [1]:
OUTDIR = Sys.getenv("CONSENSUS_MARKERS_DIR")

# Find all markers (Cell Type)

In [None]:
Idents(fg13) = fg13$tier1name
fgmarks = FindAllMarkers(fg13, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

In [None]:
Idents(cd14) = cd14$tier1name
cdmarks = FindAllMarkers(cd14, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

In [None]:
cdmarks %>% write.table(file.path(OUTDIR, "CD14_celltype_markers.tsv"),sep="\t",row.names = FALSE)
fgmarks %>% write.table(file.path(OUTDIR, "FG13_celltype_markers.tsv"),sep="\t",row.names = FALSE)

# Consensus markers (cell group)

In [20]:
get_merged_typenames = function(names, fctr) {
    grps = levels(fctr)
    nms = c()
    for(grp in grps) {
        nms = c(nms, c(names[fctr == grp] %>% 
            unique %>% 
            str_split(., pattern="\\.", simplify=TRUE) %>% .[,2] %>% 
            table %>% sort %>% rev %>% names %>% paste(collapse="_")))
    }  
    return(make.unique(nms))
}

In [2]:
list.files('~/randomforest/correspondencedata/')

In [21]:
lst = lapply(list.files('~/randomforest/correspondencedata/'), function(f){
    tbl = read.table(file.path("~/randomforest/correspondencedata", f), sep="\t", header=1)
    tbl$cdgrpnm = factor(tbl$cdgroup)
    levels(tbl$cdgrpnm) = get_merged_typenames(tbl$cd, tbl$cdgrpnm)
    tbl$fggrpnm = factor(tbl$fggroup)
    levels(tbl$fggrpnm) = get_merged_typenames(tbl$fg, tbl$fggrpnm)
    return(tbl)
})
mappedgroups = data.table::rbindlist(lst)

# map groups

In [22]:
fg13

An object of class Seurat 
44291 features across 89849 samples within 2 assays 
Active assay: SCT (21591 features)
 1 other assay present: RNA
 2 dimensional reductions calculated: pca, umap

In [24]:
fg13$groupname = as.character(mappedgroups$fggrpnm[match(fg13$curatedname, mappedgroups$fg)])
fg13$groupname[is.na(fg13$groupname)] = as.character(fg13$tier1name[is.na(fg13$groupname)])

cd14$groupname = as.character(mappedgroups$cdgrpnm[match(cd14$curatedname, mappedgroups$cd)])
cd14$groupname[is.na(cd14$groupname)] = as.character(cd14$tier1name[is.na(cd14$groupname)])

# markers for cell groups

In [None]:
Idents(fg13) = fg13$groupname
fgmarks = FindAllMarkers(fg13, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

Calculating cluster T/NK/ILC

Calculating cluster Endth/Ven_Endth/Cap_Endth/Art_Endth

Calculating cluster B

Calculating cluster Fibro

Calculating cluster Fibro.1

Calculating cluster Hs.IGA_IGL_Plsma

Calculating cluster T_Tclls

Calculating cluster B/GC

Calculating cluster Glial

Calculating cluster Hs.IGG_Plsma_Bcll

Calculating cluster T_T/NK/ILC_T/NK

Calculating cluster Hs.IGA_IGK_Plsma

Calculating cluster Hs.Mstcl



In [None]:
Idents(cd14) = cd14$groupname
cdmarks = FindAllMarkers(cd14, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

In [None]:
cdmarks %>% write.table(file.path(OUTDIR, "CD14_cellgroup_markers.tsv"),sep="\t",row.names=FALSE)
fgmarks %>% write.table(file.path(OUTDIR, "FG13_cellgroup_markers.tsv"),sep="\t",row.names=FALSE)

# markers for cell subsets

In [None]:
Idents(fg13) = fg13$curatedname
fgmarks = FindAllMarkers(fg13, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

In [None]:
Idents(cd14) = cd14$curatedname
cdmarks = FindAllMarkers(cd14, assay="RNA", only.pos=TRUE, max.cells.per.ident=10000, test.use="wilcox")

In [None]:
cdmarks %>% write.table(file.path(OUTDIR, "CD14_cellsubsets_markers.tsv"),sep="\t",row.names=FALSE)
fgmarks %>% write.table(file.path(OUTDIR, "FG13_cellsubsets_markers.tsv"),sep="\t",row.names=FALSE)