# Standard Seurat Processing for Mol Bio sequencing

## Importing commonly used Libraries:

In [1]:
install.packages('BiocManager')
BiocManager::install('multtest')
install.packages('spatstat')
install.packages('Seurat')

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

'getOption("repos")' replaces Bioconductor standard repositories, see
'help("repositories", package = "BiocManager")' for details.
Replacement repositories:
    CRAN: https://cloud.r-project.org

Bioconductor version 3.14 (BiocManager 1.30.23), R 4.1.3 (2022-03-10)

“package(s) not installed when version(s) same as or greater than current; use
  `force = TRUE` to re-install: 'multtest'”
Old packages: 'acepack', 'ade4', 'ape', 'bibtex', 'bigrquery', 'bit', 'blob',
  'brew', 'brio', 'broom', 'brotli', 'Cairo', 'callr', 'checkmate', 'circlize',
  'classInt', 'clue', 'coda', 'config', 'copula', 'covr', 'credentials',
  'crul', 'cyclocomp', 'DBI', 'DBItest', 'dbplyr', 'dendextend',
  'densityClust', 'DEoptimR', 'desc', 'deSolve', 'devtools', 'dichromat',
  'diptest', 'downlit', 'drake', 'DT', 'dtplyr', 'e1071', 'ellipse', 'eulerr',
  'expm', 'fastcluster', 'fastICA', 'fastmatch', 'fda', 'ff', 'fields',
  'fie

In [4]:
library(dplyr)
library(Seurat)
library(patchwork)
library(H5weaver)
library(hise)
library(tidyverse)
library(SeuratObject)
library(ggrepel)
library(SeuratDisk)


Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t


“running command 'timedatectl' had status 1”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.5.1     [32m✔[39m [34mpurrr  [39m 1.0.2
[32m✔[39m [34mtibble [39m 3.2.1     [32m✔[39m [34mdplyr  [39m 1.1.4
[32m✔[39m [34mtidyr  [39m 1.3.1     [32m✔[39m [34mstringr[39m 1.5.1
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



## Creating Seurat Objects from h5 outs from Cellranger

### Reading h5 files into memory

In [6]:
h5s <- list.files(
    path = '/home/jupyter/CS15_WHBL/CWB_Paper/01_Final_Data/05_Data/Exp00895_w1', 
    pattern = 'filtered_feature_bc_matrix.h5$',
    full.names = TRUE, 
    recursive = TRUE
)


### Creating Seurat Objects

In [7]:
fully <- lapply(h5s, function(x){
    pro <- strsplit(strsplit(x,'/per_sample_outs/')[[1]][2],'/count/')[[1]][1]


    exp <- strsplit(strsplit(x,'/outs/')[[1]][1],'/')[[1]][length(strsplit(strsplit(x,'/outs/')[[1]][1],'/')[[1]])]

    pro <- paste(exp,pro,sep='_')

    mtx <- Read10X_h5(x) # reading the matrix
    so <- CreateSeuratObject(mtx,project=pro) # creating the SO, using 'pro' 
    return(so) # adds the SO to the list for each sample's h5
    })


Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA
Unnamed: 0_level_1,<fct>,<dbl>,<int>
AAACAAGCATAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,2913,1592
AAACCAATCACGTTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6994,3513
AAACCAATCACTGTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,12229,3982
AAACCAATCCACAATTAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6612,2734
AAACCAATCCTAAATCAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,7654,2976
AAACCAATCGAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6523,2762


In [8]:
fully <- Reduce(merge,fully) # this compresses your list of SOs into a single large SO


In [9]:
fully[["percent.mt"]] <- PercentageFeatureSet(fully, pattern = "^MT-")
fully <- subset(fully, subset = percent.mt < 5)


### Normalizing, running PCA and UMAP clustering

In [17]:
fully <- JoinLayers(fully)

In [18]:
fully <- NormalizeData(fully) %>% 
    FindVariableFeatures() %>% 
    ScaleData() %>% 
    RunPCA() %>% 
    RunUMAP(dims = 1:20) %>% 
    FindNeighbors(dims = 1:20) %>% 
    FindClusters(resolution = 0.5)


Normalizing layer: counts

Finding variable features for layer counts

Centering and scaling data matrix

PC_ 1 
Positive:  CD69, ICOS, SLA, MYC, PGGHG, GPR171, STAT5A, PKIA, NDFIP2, PGAP1 
	   MAL, TSHZ2, GFI1, WDR74, LAMP3, IRF4, SACS, CTPS1, BZW2, ITGA6 
	   MT2A, NUCB2, IFIT1, CD40LG, TFRC, RRP9, USP18, CISH, ODC1, DEAF1 
Negative:  ANPEP, SPI1, LYZ, CD300E, LILRB2, SEMA6B, KYNU, IFI30, NCF2, FCAR 
	   HCK, C5AR1, CST3, TLR4, ENG, S100A9, MPEG1, EMILIN2, SLC37A2, TLR2 
	   MS4A7, PLXNB2, CYBB, SERPINA1, PID1, TNFAIP2, ZNF385A, SLC7A11, VCAN, SRC 
PC_ 2 
Positive:  AOAH, SPN, CST7, NKG7, SLA2, CD300A, SYNE1, C1orf21, OASL, GNLY 
	   PRF1, KLRD1, PRR5L, HIST1H1C, FGFBP2, AUTS2, TRGC2, S1PR5, APOL3, GZMH 
	   CTSW, MYO1F, CCL5, ADGRG1, ZBP1, TRGC1, MYBL1, NCR1, TNFSF10, GK5 
Negative:  BCL2A1, FOSL1, KDM6B, SLC43A3, NME1, SLC7A1, PIM3, IRF8, EPOP, MYC 
	   TFRC, MFSD2A, LRRC8B, SLC7A5, IRF4, NFKBID, FABP5, SLAMF1, RRP9, SLC1A5 
	   BZW2, INSIG1, ODC1, TRAF4, SLC29A1, SLC16A1, PUS7, NA

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 127091
Number of edges: 4008535

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9479
Number of communities: 31
Elapsed time: 100 seconds


## Plotting

In [21]:
fully$tech <- substr(fully$orig.ident,21,44)


Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.5,seurat_clusters,tech
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<fct>,<fct>,<chr>
AAACAAGCATAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,2913,1592,0.0,21,21,CD3_CD28
AAACCAATCACGTTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6994,3513,0.48613097,18,18,CD3_CD28
AAACCAATCACTGTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,12229,3982,1.30018808,14,14,CD3_CD28
AAACCAATCCACAATTAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6612,2734,0.0,12,12,CD3_CD28
AAACCAATCCTAAATCAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,7654,2976,0.06532532,12,12,CD3_CD28
AAACCAATCGAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6523,2762,0.13797333,9,9,CD3_CD28


In [24]:
fully$donor <- substr(fully$orig.ident,13,19)


Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.5,seurat_clusters,tech,donor
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<fct>,<fct>,<chr>,<chr>
AAACAAGCATAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,2913,1592,0.0,21,21,CD3_CD28,BL05012
AAACCAATCACGTTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6994,3513,0.48613097,18,18,CD3_CD28,BL05012
AAACCAATCACTGTCAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,12229,3982,1.30018808,14,14,CD3_CD28,BL05012
AAACCAATCCACAATTAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6612,2734,0.0,12,12,CD3_CD28,BL05012
AAACCAATCCTAAATCAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,7654,2976,0.06532532,12,12,CD3_CD28,BL05012
AAACCAATCGAACCGAAACGGGAA-1,Exp00895_w1_BL05012_CD3_CD28,6523,2762,0.13797333,9,9,CD3_CD28,BL05012


## Splitting the full SO into FACS/Bead pairs and reclustering:

## Celltype label transfer

In [33]:
ref <- LoadH5Seurat(file = '/home/jupyter/pbmc_multimodal.h5seurat')


Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat

Validating h5Seurat file

Initializing ADT with data

Adding counts for ADT

Adding variable feature information for ADT

Adding miscellaneous information for ADT

Initializing SCT with data

Adding counts for SCT

Adding variable feature information for SCT

Adding miscellaneous information for SCT

Adding reduction apca

Adding cell embeddings for apca

Adding feature loadings for apca

Adding miscellaneous information for apca

Adding reduction aumap

Adding cell embeddings for aumap

Adding miscellaneous information for aumap

Adding reduction pca

Adding cell embeddings for pca

Adding feature loadings for pca

Adding miscellaneous information for pca

Adding reduction spca

Adding cell embeddings for spca

Adding feature loadings for spca

Adding miscellaneous information for spca

Adding reduction umap

Adding cell embeddings for umap

Adding miscellaneous information for uma

In [35]:
anchors <- FindTransferAnchors(
    reference = ref,
    query = fully,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50
)


Normalizing query using reference SCT model

Projecting cell embeddings

Finding neighborhoods

Finding anchors

	Found 5404 anchors



In [36]:
fully <- TransferData(
    anchorset = anchors, 
    reference = ref, 
    query = fully,
    refdata = list(
        celltype.l1 = "celltype.l1",
        celltype.l2 = "celltype.l2",
        celltype.l3 = "celltype.l3",
        predicted_ADT = 'ADT'
    )
)

Finding integration vectors

Finding integration vector weights

Predicting cell labels

“Layer counts isn't present in the assay object; returning NULL”
Predicting cell labels

“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“Layer counts isn't present in the assay object; returning NULL”
Predicting cell labels

“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“Layer counts isn't present in the assay object; returning NULL”
Transfering 228 features onto reference data

“Layer counts isn't present in the assay object; returning NULL”


In [37]:
fully <- JoinLayers(fully)

In [48]:
fully$donor_tech <- paste0(fully$donor,'_',fully$tech)

In [None]:
split <- SplitObject(fully, split.by='donor')


In [None]:
split

In [None]:
BL05012_cl <- split_cl$BL05012
BL05111_cl <- split_cl$BL05111
BL05731_cl <- split_cl$BL05731
PB02183_cl <- split_cl$PB02183


In [None]:
BL05012_cl <- SplitObject(BL05012_cl, split.by='predicted.celltype.l1')
BL05111_cl <- SplitObject(BL05111_cl, split.by='predicted.celltype.l1')
BL05731_cl <- SplitObject(BL05731_cl, split.by='predicted.celltype.l1')
PB02183_cl <- SplitObject(PB02183_cl, split.by='predicted.celltype.l1')


In [50]:
saveRDS(fully_cl, '/home/jupyter/CS15_WHBL/CWB_Paper/01_Final_Data/05_Data/Fig_5_stim_cl.rds')

In [None]:
sessionInfo()