In [None]:
# Author: Janssen Kotah
# snRNAseq analysis for WT/SHIP1 KO mice as part of Matera et al. project
# Code adapted from SoupX template by Thomas Rust, adapted to merge scrublet analysis outputs

In [1]:
library(dplyr)
library(patchwork)
library(ggplot2)
library(SoupX)
library(Seurat)
library(stringr)
library(DropletUtils)
library(data.table)
sessionInfo()



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching SeuratObject

Loading required package: SingleCellExperiment

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘matrixStats’


The following object is masked from ‘package:dplyr’:

    count



Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads

R version 4.2.3 (2023-03-15)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/bcn/p310674/anaconda3/envs/R_env/lib/libopenblasp-r0.3.20.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] data.table_1.14.8           DropletUtils_1.16.0        
 [3] SingleCellExperiment_1.18.1 SummarizedExperiment_1.28.0
 [5] Biobase_2.58.0              GenomicRanges_1.50.2       
 [7] GenomeInfoDb_1.34.9         IRanges_2.32.0             
 [9] S4Vectors_0.36.0            BiocGeneri

In [None]:
#make a list of seurat objects

In [3]:
soupX_objects = list.files("./001_SoupX_corrected_counts/", pattern = '.rds')
soupX_objects

### looped way to load in soupX objects

In [21]:
total.list = list()
for (each in soupX_objects){
    sample = str_sub(each, 1, 3)
    listName_samp = paste0("srat",sample)
        
    print(paste0("Loading sample ", sample))
    
    soupX_load = readRDS(paste0("001_SoupX_corrected_counts/", each))
    seu_obj = CreateSeuratObject(soupX_load, min.cells = 3, min.features = 200, project = "SHIP1")
    seu_obj$sample = sample
    
    total.list[[sample]] = seu_obj

}

total.list

[1] "Loading sample JK1"
[1] "Loading sample JK2"
[1] "Loading sample JK3"
[1] "Loading sample JK4"
[1] "Loading sample JK5"
[1] "Loading sample JK6"
[1] "Loading sample JK7"
[1] "Loading sample JK8"


$JK1
An object of class Seurat 
21963 features across 8087 samples within 1 assay 
Active assay: RNA (21963 features, 0 variable features)

$JK2
An object of class Seurat 
22333 features across 8227 samples within 1 assay 
Active assay: RNA (22333 features, 0 variable features)

$JK3
An object of class Seurat 
22289 features across 10399 samples within 1 assay 
Active assay: RNA (22289 features, 0 variable features)

$JK4
An object of class Seurat 
21371 features across 5840 samples within 1 assay 
Active assay: RNA (21371 features, 0 variable features)

$JK5
An object of class Seurat 
22171 features across 7129 samples within 1 assay 
Active assay: RNA (22171 features, 0 variable features)

$JK6
An object of class Seurat 
21902 features across 5817 samples within 1 assay 
Active assay: RNA (21902 features, 0 variable features)

$JK7
An object of class Seurat 
19433 features across 884 samples within 1 assay 
Active assay: RNA (19433 features, 0 variable features)

$JK8
An object of cl

In [22]:
## ADD METADATA FROM FILE INTO SEURAT OBJECTS
isolation_metadata = readxl::read_excel("20240301_Annotation_for_Metadata_Updated_Resequenced.xlsx")
isolation_metadata = isolation_metadata %>% filter(SampleNumber %in% names(total.list)) 
colnames(isolation_metadata) = gsub("%", "pct", colnames(isolation_metadata))

# Add the metadata to each individual sample in the list
for(i in 1:length(total.list)){
    metadata_total <- total.list[[i]]@meta.data[,c("orig.ident", "sample")]
    metadata_total$cell_id <- row.names(metadata_total)
    metadata <-  isolation_metadata %>% filter(SampleNumber == names(total.list)[i]) %>%
        right_join(x = ., y = metadata_total, by = c("SampleNumber"="sample"))
    row.names(metadata) <- metadata$cell_id
      
  total.list[[i]] <- AddMetaData(object = total.list[[i]], metadata = metadata)
  
  saveRDS(total.list[[i]]@meta.data,
            file = paste0("002B Seurat SoupX Metadata ", names(total.list[i]),".csv"))
}

saveRDS(object = total.list, file = "002B_listed_samples_after_SoupX_unprocessed_with_metadata.rds")


“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”
“Setting row names on a tibble is deprecated.”


In [23]:
total.list[[1]]@meta.data

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,sample,SampleNumber,SampleID,UltracentrifugeRound,Genotype,Sex,Age_PND,⋯,NEUN_vol_to_add,Total_Vol,Buffer_add,QC1,QC2,CellRanger_cellNum,CellRanger_Avg_Counts,CellRanger_Median_Counts,CellRanger_Saturation,cell_id
Unnamed: 0_level_1,<fct>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
AAACCCAAGAGGCGGA-1,SHIP1,3576.056,1832,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACCCAAGAGGCGGA-1
AAACCCACAAGCGCTC-1,SHIP1,5442.952,2447,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACCCACAAGCGCTC-1
AAACCCACATTGTACG-1,SHIP1,5884.759,2468,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACCCACATTGTACG-1
AAACGAAAGACGCATG-1,SHIP1,3379.408,1745,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAAAGACGCATG-1
AAACGAAAGATGGTAT-1,SHIP1,7225.768,2971,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAAAGATGGTAT-1
AAACGAAAGGTCATTC-1,SHIP1,1381.231,902,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAAAGGTCATTC-1
AAACGAAAGTCATACC-1,SHIP1,5369.318,2257,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAAAGTCATACC-1
AAACGAACACAAGTGG-1,SHIP1,2693.573,1445,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAACACAAGTGG-1
AAACGAACAGCCTATA-1,SHIP1,4542.071,2083,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAACAGCCTATA-1
AAACGAACATTCAGCA-1,SHIP1,3285.164,1671,JK1,JK1,AM_40070,1,WT,M,15,⋯,13.565,45,0,11.4,34.2,8087,19738,2127,33.35,AAACGAACATTCAGCA-1


### Add scrublet

In [42]:
total.list2 = list()
for (each in names(total.list)){
    print(paste0("Analyzing sample ", each))
    
    #scrublet directory location
    scrublet.dir = paste0("./002A_Scrublet/", each, "_scrublet_results.tsv")
    
    #load scrublet, make barcodes as rownames
    scrublet_df = read.delim(scrublet.dir)
    rownames(scrublet_df) = scrublet_df$X
    scrublet_df = scrublet_df[-1]
    
    #merge with relevant seurat obj
    total.list2[[each]] <- AddMetaData(object = total.list[[each]], metadata = scrublet_df)
}

total.list2

[1] "Analyzing sample JK1"
[1] "Analyzing sample JK2"
[1] "Analyzing sample JK3"
[1] "Analyzing sample JK4"
[1] "Analyzing sample JK5"
[1] "Analyzing sample JK6"
[1] "Analyzing sample JK7"
[1] "Analyzing sample JK8"


$JK1
An object of class Seurat 
21963 features across 8087 samples within 1 assay 
Active assay: RNA (21963 features, 0 variable features)

$JK2
An object of class Seurat 
22333 features across 8227 samples within 1 assay 
Active assay: RNA (22333 features, 0 variable features)

$JK3
An object of class Seurat 
22289 features across 10399 samples within 1 assay 
Active assay: RNA (22289 features, 0 variable features)

$JK4
An object of class Seurat 
21371 features across 5840 samples within 1 assay 
Active assay: RNA (21371 features, 0 variable features)

$JK5
An object of class Seurat 
22171 features across 7129 samples within 1 assay 
Active assay: RNA (22171 features, 0 variable features)

$JK6
An object of class Seurat 
21902 features across 5817 samples within 1 assay 
Active assay: RNA (21902 features, 0 variable features)

$JK7
An object of class Seurat 
19433 features across 884 samples within 1 assay 
Active assay: RNA (19433 features, 0 variable features)

$JK8
An object of cl

In [51]:
total.list2[[8]]@meta.data

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,sample,SampleNumber,SampleID,UltracentrifugeRound,Genotype,Sex,Age_PND,⋯,Buffer_add,QC1,QC2,CellRanger_cellNum,CellRanger_Avg_Counts,CellRanger_Median_Counts,CellRanger_Saturation,cell_id,doublet_scores,predicted_doublets
Unnamed: 0_level_1,<fct>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>
AAACCCAAGCGTGTTT-1,SHIP1,1958.6806,1224,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCAAGCGTGTTT-1,0.09510870,False
AAACCCAAGGAGCAAA-1,SHIP1,3001.8391,1600,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCAAGGAGCAAA-1,0.04815864,False
AAACCCAAGTAGATCA-1,SHIP1,9604.9875,3161,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCAAGTAGATCA-1,0.11520737,False
AAACCCAGTTCAAACC-1,SHIP1,7074.0146,2847,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCAGTTCAAACC-1,0.10256410,False
AAACCCATCACTACGA-1,SHIP1,3097.6909,1498,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCATCACTACGA-1,0.02315485,False
AAACCCATCAGGAACG-1,SHIP1,5041.4935,2070,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCATCAGGAACG-1,0.01581028,False
AAACCCATCCGGGACT-1,SHIP1,6803.9309,2538,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACCCATCCGGGACT-1,0.01581028,False
AAACGAACAAGTGACG-1,SHIP1,7885.0060,2835,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACGAACAAGTGACG-1,0.22727273,False
AAACGAACAGACCTAT-1,SHIP1,10163.4212,3016,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACGAACAGACCTAT-1,0.02739726,False
AAACGAATCACATTGG-1,SHIP1,3280.9479,1442,JK8,JK8,AM_40071,2,WT,F,15,⋯,21.9,5.52,19.86,4936,27853,2200,32.3,AAACGAATCACATTGG-1,0.16116505,False


In [52]:
saveRDS(object = total.list2, file = "002B_listed_samples_after_SoupX_unprocessed_with_metadata.rds")
