In [1]:
library(ArchR)
library(parallel)
library(BSgenome.Mmusculus.UCSC.mm10)
library(ggpubr)
library(dplyr)

addArchRGenome("mm10")

set.seed(147)

Loading required package: ggplot2

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats

# D30 Dataset

In [2]:
proj <- loadArchRProject("../intermediate_outputs3/projects/ArchR_D30_min_NA_max_600_scpeakset")

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [None]:
# Cell counts per sample
data.frame(proj@cellColData) %>% group_by(Sample) %>% summarize(n = n())

Sample,n
<chr>,<int>
D30_Ctrl,5403
D30_PIMQ,5325


In [None]:
# Export UMAP coordinates for SEACells initialization
umap_coords <- proj@embeddings$UMAP$df
dir.create("../intermediate_outputs3/projects/ArchR_D30_min_NA_max_600_scpeakset/seacells/", showWarnings = FALSE, recursive = TRUE)
write.table(umap_coords, '../intermediate_outputs3/projects/ArchR_D30_min_NA_max_600_scpeakset/seacells/D30_umap_df.txt')


In [None]:
# Add motif annotations using CIS-BP database and export motif-peak matching matrix
proj <- addMotifAnnotations(ArchRProj = proj, motifSet = "cisbp", name = "Motif")
motif_matches <- readRDS(proj@peakAnnotation$Motif$Matches)
dir.create("../intermediate_outputs3/matrices/", showWarnings = FALSE, recursive = TRUE)
writeMM(assay(motif_matches), "../intermediate_outputs3/matrices/D30_scpeakSet_MotifMatrix.mtx")
proj <- saveArchRProject(ArchRProj = proj)

ArchR logging to : ArchRLogs/ArchR-addMotifAnnotations-dfa36f77665b-Date-2025-01-05_Time-11-43-44.681972.log
If there is an issue, please report to github with logFile!

2025-01-05 11:43:47.894986 : Gettting Motif Set, Species : Mus musculus, 0.001 mins elapsed.

Using version 2 motifs!

2025-01-05 11:43:49.370205 : Finding Motif Positions with motifmatchr!, 0.026 mins elapsed.

2025-01-05 11:45:37.723377 : Creating Motif Overlap Matrix, 1.832 mins elapsed.

2025-01-05 11:45:39.879883 : Finished Getting Motif Info!, 1.868 mins elapsed.

ArchR logging successful to : ArchRLogs/ArchR-addMotifAnnotations-dfa36f77665b-Date-2025-01-05_Time-11-43-44.681972.log



NULL

Saving ArchRProject...

Loading ArchRProject...

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          

In [None]:
# Export data for SEACells metacell construction

# Set up export directories
setwd(getOutputDirectory(proj))
dir.create('seacells')
dir.create("seacells/SEACells_export")
setwd("seacells/SEACells_export")

# Export LSI embeddings for SEACells kernel construction
write.csv(getReducedDims(proj, reducedDims = "IterativeLSI"), "svd.csv", quote = FALSE)
write.csv(getCellColData(proj), "cell_metadata.csv", quote = FALSE)

# Export gene activity scores
gene_score_mat <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix")
gene_scores <- assays(gene_score_mat)[["GeneScoreMatrix"]]
gene_scores <- as.matrix(gene_scores)
rownames(gene_scores) <- rowData(gene_score_mat)$name
write.csv(gene_scores, "gene_scores.csv", quote = FALSE)

# Export peak accessibility counts
peak_set <- getPeakSet(proj)
peak_mat <- getMatrixFromProject(proj, "PeakMatrix")

# Reorder peaks by chromosome for consistent downstream processing
chr_order <- sort(seqlevels(peak_set))
peaks_by_chr <- list()
for(chr in chr_order){ peaks_by_chr[[chr]] = peak_set[seqnames(peak_set) == chr] }
peaks_reordered <- Reduce("c", peaks_by_chr)

# Export peak count matrix and feature metadata
wd <- getwd()
dir.create("peak_counts")
setwd("peak_counts")

peak_counts <- assays(peak_mat)[["PeakMatrix"]]
writeMM(peak_counts, "counts.mtx")
write.csv(colnames(peak_mat), "cells.csv", quote = FALSE)
names(peaks_reordered) <- sprintf("Peak%d", 1:length(peaks_reordered))
write.csv(as.data.frame(peaks_reordered), "peaks.csv", quote = FALSE)

“'seacells' already exists”
ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-dfa32a374f57-Date-2025-01-05_Time-11-57-01.067424.log
If there is an issue, please report to github with logFile!

2025-01-05 11:57:12.268919 : Organizing colData, 0.187 mins elapsed.

2025-01-05 11:57:12.297902 : Organizing rowData, 0.187 mins elapsed.

2025-01-05 11:57:12.300601 : Organizing rowRanges, 0.187 mins elapsed.

2025-01-05 11:57:12.304919 : Organizing Assays (1 of 1), 0.187 mins elapsed.

2025-01-05 11:57:12.426687 : Constructing SummarizedExperiment, 0.189 mins elapsed.

2025-01-05 11:57:13.08588 : Finished Matrix Creation, 0.2 mins elapsed.

“sparse->dense coercion: allocating vector of size 1.9 GiB”
ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-dfa341950a9b-Date-2025-01-05_Time-12-00-31.640148.log
If there is an issue, please report to github with logFile!

2025-01-05 12:00:47.693868 : Organizing colData, 0.268 mins elapsed.

2025-01-05 12:00:47.72267 : Organizing rowData, 0.26

NULL

# Y1 Dataset

In [None]:
# Reset working directory for Y1 dataset processing
setwd('/data/peer/sotougl/Fuchs/inflammatory_memory/scATAC/Scripts')
getwd()


In [11]:
proj <- loadArchRProject("../intermediate_outputs3/projects/ArchR_Y1_min_NA_max_600_scpeakset")

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [None]:
# Export UMAP coordinates for SEACells initialization
umap_coords <- proj@embeddings$UMAP$df
dir.create("../intermediate_outputs3/projects/ArchR_Y1_min_NA_max_600_scpeakset/seacells/", showWarnings = FALSE, recursive = TRUE)

write.table(umap_coords, '../intermediate_outputs3/projects/ArchR_Y1_min_NA_max_600_scpeakset/seacells/Y1_umap_df.txt')


In [None]:
# Cell counts per sample
data.frame(proj@cellColData) %>% group_by(Sample) %>% summarize(n = n())

Sample,n
<chr>,<int>
Y1_Ctrl,2182
Y1_PIMQ,2294


In [None]:
# Add motif annotations using CIS-BP database and export motif-peak matching matrix
proj <- addMotifAnnotations(ArchRProj = proj, motifSet = "cisbp", name = "Motif")
motif_matches <- readRDS(proj@peakAnnotation$Motif$Matches)
writeMM(assay(motif_matches), "../intermediate_outputs3/matrices/Y1_scpeakSet_MotifMatrix.mtx")
proj <- saveArchRProject(ArchRProj = proj)

ArchR logging to : ArchRLogs/ArchR-addMotifAnnotations-dfa3407b2d9-Date-2025-01-05_Time-12-03-22.331064.log
If there is an issue, please report to github with logFile!

2025-01-05 12:03:22.417368 : Gettting Motif Set, Species : Mus musculus, 0.001 mins elapsed.

Using version 2 motifs!

2025-01-05 12:03:24.516201 : Finding Motif Positions with motifmatchr!, 0.036 mins elapsed.

2025-01-05 12:05:48.418801 : Creating Motif Overlap Matrix, 2.435 mins elapsed.

2025-01-05 12:05:50.558787 : Finished Getting Motif Info!, 2.47 mins elapsed.

ArchR logging successful to : ArchRLogs/ArchR-addMotifAnnotations-dfa3407b2d9-Date-2025-01-05_Time-12-03-22.331064.log



NULL

Saving ArchRProject...

Loading ArchRProject...

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          

In [None]:
# Export data for SEACells metacell construction

# Set up export directories
setwd(getOutputDirectory(proj))
dir.create("seacells")
dir.create("seacells/SEACells_export")
setwd("seacells/SEACells_export")

# Export LSI embeddings for SEACells kernel construction
write.csv(getReducedDims(proj, reducedDims = "IterativeLSI"), "svd.csv", quote = FALSE)
write.csv(getCellColData(proj), "cell_metadata.csv", quote = FALSE)

# Export gene activity scores
gene_score_mat <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix")
gene_scores <- assays(gene_score_mat)[["GeneScoreMatrix"]]
gene_scores <- as.matrix(gene_scores)
rownames(gene_scores) <- rowData(gene_score_mat)$name
write.csv(gene_scores, "gene_scores.csv", quote = FALSE)

# Export peak accessibility counts
peak_set <- getPeakSet(proj)
peak_mat <- getMatrixFromProject(proj, "PeakMatrix")

# Reorder peaks by chromosome for consistent downstream processing
chr_order <- sort(seqlevels(peak_set))
peaks_by_chr <- list()
for(chr in chr_order){ peaks_by_chr[[chr]] = peak_set[seqnames(peak_set) == chr] }
peaks_reordered <- Reduce("c", peaks_by_chr)

# Export peak count matrix and feature metadata
wd <- getwd()
dir.create("peak_counts")
setwd("peak_counts")

peak_counts <- assays(peak_mat)[["PeakMatrix"]]
writeMM(peak_counts, "counts.mtx")
write.csv(colnames(peak_mat), "cells.csv", quote = FALSE)
names(peaks_reordered) <- sprintf("Peak%d", 1:length(peaks_reordered))
write.csv(as.data.frame(peaks_reordered), "peaks.csv", quote = FALSE)

“'seacells' already exists”
ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-dfa32063d9d7-Date-2025-01-05_Time-12-05-57.601032.log
If there is an issue, please report to github with logFile!

2025-01-05 12:06:16.630113 : Organizing colData, 0.317 mins elapsed.

2025-01-05 12:06:16.658039 : Organizing rowData, 0.318 mins elapsed.

2025-01-05 12:06:16.660766 : Organizing rowRanges, 0.318 mins elapsed.

2025-01-05 12:06:16.665023 : Organizing Assays (1 of 1), 0.318 mins elapsed.

2025-01-05 12:06:16.78604 : Constructing SummarizedExperiment, 0.32 mins elapsed.

2025-01-05 12:06:17.497175 : Finished Matrix Creation, 0.332 mins elapsed.

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-dfa3519afa54-Date-2025-01-05_Time-12-07-46.503544.log
If there is an issue, please report to github with logFile!

2025-01-05 12:08:11.146295 : Organizing colData, 0.411 mins elapsed.

2025-01-05 12:08:11.174596 : Organizing rowData, 0.411 mins elapsed.

2025-01-05 12:08:11.178828 : Organizing r

NULL