In [None]:
library(Seurat)
library(dplyr)
library(Matrix)
library(Signac)
library(ggplot2)
library(ggrepel)
library(GenomicRanges)
library(AnnotationHub)
library(patchwork)
library(ensembldb)
library(biovizBase)
library(TFBSTools)
library(JASPAR2020)
library(Biostrings)
library(ggseqlogo)
library(universalmotif)
library(BSgenome.Hsapiens.UCSC.hg38)

In [None]:
rda_path <- "/data/imindol02/COVID_multiome/COVID_Objects/COVID_filter6.rda"
load(rda_path)

In [None]:
pbmc <- COVID_filter6
print(pbmc)

In [None]:
fragments_file = '/data/imindol02/COVID_multiome/COVID_ATAC/atac_fragments.tsv.gz'

counts_data <- pbmc[['peaks']]$counts
new_chrom_assay <- CreateChromatinAssay(
  counts = counts_data,        # 2단계에서 추출한 counts
  sep = c(":", "-"),
  fragments = fragments_file,  # 1단계에서 지정한 원본 경로
  min.cells = 10,
  min.features = 200
)

pbmc[['peaks']] <- new_chrom_assay
class(pbmc[['peaks']])
DefaultAssay(pbmc) <- "peaks"

frag_objects <- Fragments(pbmc)
print(frag_objects[[1]]@path)

In [None]:
ah <- AnnotationHub()

query(ah, c("EnsDb", "Homo sapiens"))

In [None]:
options(timeout = 3600)
ensdb_latest <- ah[["AH53211"]]

annotations <- GetGRangesFromEnsDb(ensdb = ensdb_latest)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"
Annotation(pbmc) <- annotations
print(pbmc[['peaks']])

In [None]:
my_levels <- c('B','Plasmablast','CD4_T','Treg','MAIT','CD8_T','NKT','NK','CD14_Monocyte','CD16_Monocyte','cDC1','cDC2','pDC')
pbmc@active.ident <- factor(x = pbmc@active.ident, levels = my_levels)

In [None]:
# B / T subclustering by loading csv

full_metadata <- read.csv('/data/imindol02/COVID_multiome/COVID_DataFrames/COVID_subset/updated_metadata_B.csv', row.names = 1)
#full_metadata <- read.csv('/data/imindol02/COVID_multiome/COVID_DataFrames/COVID_subset/updated_metadata_T.csv', row.names = 1)

pbmc <- AddMetaData(object = pbmc, 
                          metadata = full_metadata['B_subcluster'],  # 'T_subcluster'
                          col.name = 'B_subcluster')  # 'T_subcluster'

In [None]:
full_metadata <- read.csv('/data/imindol02/COVID_multiome/COVID_DataFrames/COVID_subset/updated_metadata_celltype_abs.csv', row.names = 1)

pbmc <- AddMetaData(object = pbmc, 
                          metadata = full_metadata['cell_type_abstract'],
                          col.name = 'celltype_abstract')

In [None]:
pfm <- getMatrixSet(
  x = JASPAR2020,
  opts = list(species = 9606, all_versions = FALSE) # 9606=Human, 10090=Mouse
)

In [None]:
pbmc <- AddMotifs(
  object = pbmc,
  genome = BSgenome.Hsapiens.UCSC.hg38,
  pfm = pfm,
  assay = "peaks" 
)

In [None]:
pbmc <- RegionStats(
  object = pbmc,
  genome = BSgenome.Hsapiens.UCSC.hg38,
  assay = "peaks"
)

In [None]:
B_subset <- subset(x = pbmc, subset = big_anno %in% c("B", "Plasmablast"))

Idents(B_subset) <- "B_subcluster"

my_levels <- c('Naive B cell group 1','Naive B cell group 2','Activated Naive B cell','Memory B cell','Transitional B cell','atypical Memory B cell','atypical B cell','Plasmablast')
B_subset@active.ident <- factor(x = B_subset@active.ident, levels = my_levels)

In [None]:
T_subset <- subset(x = pbmc, subset = big_anno %in% c("CD4_T", "CD8_T", "MAIT", "Treg"))

Idents(T_subset) <- "T_subcluster"

my_levels <- c('Naive T','Naive CD4','Naive CD8','CM CD4','EM CD4','Treg','CTL','MAIT')
T_subset@active.ident <- factor(x = T_subset@active.ident, levels = my_levels)

In [None]:
atBC_DAR <- FindMarkers(
  object = B_subset,
  ident.1 = "atypical B cell", # change to 'atypical Memory B cell' / change to atMBC_DAR
  group.by = "B_subcluster",
  only.pos = TRUE,
  test.use = 'LR'
)

atBC_DAR_peak <- rownames(atBC_DAR[atBC_DAR$p_val < 0.05, ])

atBC_motif <- FindMotifs(
  object = B_subset,
  features = atBC_DAR_peak
)

atBC_motif_df <- atBC_motif

plot_data <- atBC_motif_df %>%
  arrange(pvalue) %>%
  mutate(log_p = -log10(pvalue))

label_data <- plot_data %>%
  head(30)

p <- ggplot(plot_data, aes(x = fold.enrichment, y = log_p)) +
  
  geom_point(aes(color = log_p), size = 1.5, alpha = 1.0) +
  scale_color_gradient(low = "grey", high = "#E41A1C") +
  
  geom_text_repel(
    data = label_data,
    aes(label = motif.name),
    box.padding = 1.0,
    point.padding = 1.0,
    max.overlaps = Inf,
    #min.segment.length = 0,
    segment.color = "grey80",
    segment.size = 0.3,
    size = 3.5,
    fontface = "plain", # bold
    color = "black"
  ) +
  
  theme_classic() +
  labs(
    title = "Enriched Motifs",
    subtitle = "Labels: top 30 significant motifs",
    x = "Fold Enrichment",
    y = "-log10(P-value)",
    color = "Significance"
  ) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )


for (i in seq_len(nrow(label_data))) {
  row_data <- label_data[i, ]
  motif_id <- row_data$motif
  q <- MotifPlot(
      object = B_subset,
      motifs = motif_id,
      assay = 'peaks'
    )
}

In [None]:
MAIT_DAR <- FindMarkers(
  object = T_subset,
  ident.1 = "MAIT",
  group.by = "T_subcluster",
  only.pos = TRUE,
  test.use = 'LR'
)

MAIT_DAR_peak <- rownames(MAIT_DAR[MAIT_DAR$p_val < 0.05, ])

MAIT_motif <- FindMotifs(
  object = T_subset,
  features = MAIT_DAR_peak
)

MAIT_motif_df <- MAIT_motif

plot_data <- MAIT_motif_df %>%
  arrange(pvalue) %>%
  mutate(log_p = -log10(pvalue))

label_data <- plot_data %>%
  head(30)
  
p <- ggplot(plot_data, aes(x = fold.enrichment, y = log_p)) +
  
  geom_point(aes(color = log_p), size = 1.5, alpha = 1.0) +
  scale_color_gradient(low = "grey", high = "#E41A1C") +
  
  geom_text_repel(
    data = label_data,
    aes(label = motif.name),
    box.padding = 1.0,
    point.padding = 1.0,
    max.overlaps = Inf,
    #min.segment.length = 0,
    segment.color = "grey80",
    segment.size = 0.3,
    size = 3.5,
    fontface = "plain", # bold
    color = "black"
  ) +
  
  theme_classic() +
  labs(
    title = "Enriched Motifs",
    subtitle = "Labels: top 30 significant motifs",
    x = "Fold Enrichment",
    y = "-log10(P-value)",
    color = "Significance"
  ) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )


for (i in seq_len(nrow(label_data))) {  
  row_data <- label_data[i, ]
  motif_id <- row_data$motif
  q <- MotifPlot(
      object = T_subset,
      motifs = motif_id,
      assay = 'peaks'
    )
}

In [None]:
# Coverageplot

roi <- StringToGRanges(c("chr19-41330323-41353922"))

CoveragePlot(
  object = B_subset,
  region = "TGFB1",
  region.highlight = roi,
  group.by = "B_subcluster",
  extend.upstream = 1500,
  extend.downstream = 500,
  width = 25, height = 15
)

In [None]:
target_genes <- c("TGFB1")

B_subset <- LinkPeaks(
  object = B_subset,
  peak.assay = "peaks",
  expression.assay = "RNA", # or "SCT"
  genes.use = target_genes,
  distance = 500000,
  pvalue_cutoff = 0.05,
  score_cutoff = 0.05
)

links_df <- as.data.frame(Links(B_subset))

linked_peaks <- links_df %>%
  filter(gene == target_gene & score > 0.05) %>%
  pull(peak)

enriched_motifs <- FindMotifs(
  object = B_subset,
  features = linked_peaks
)

In [None]:
TGFB1_linked_motifs <- enriched_motifs %>%
  filter(observed == 1) %>%
  pull(motif.name)
length(TGFB1_linked_motifs)

In [None]:
atBC_linked_motifs <- atBC_motif %>%
  filter(pvalue < 0.05) %>%
  pull(motif.name)
length(atBC_linked_motifs)

In [None]:
MOTIF_RESULT <- intersect(TGFB1_linked_motifs, atBC_linked_motifs)
length(MOTIF_RESULT)
print(MOTIF_RESULT)

In [None]:
total_annotation <- Annotation(B_subset)
target_genes <- c("TGFB1") 
subset_annotation <- total_annotation[total_annotation$gene_name %in% target_genes]
B_subset_forPlot <- B_subset
Annotation(B_subset_forPlot) <- subset_annotation

p <- CoveragePlot(
  object = B_subset_forPlot,
  region = "TGFB1",
  features = "TGFB1",
  expression.assay = "RNA",
  links = TRUE,
  extend.upstream = 20000,
  extend.downstream = 300000
)

In [None]:
roi <- StringToGRanges(c("chr19-41570170-41570225"))

p <- CoveragePlot(
  object = B_subset_forPlot,
  region = "chr19-41569966-41570457",
  region.highlight = roi,
  feature = "TGFB1",
  expression.assay = "RNA",
  width = 25, height = 15,
  annotation = FALSE,
  peaks = FALSE,
  links = FALSE
)