# Set-up
Look at SC.beta and SC.alpha for different tools for H1_control and A2_control

In [1]:
import os
import sys
import glob
import pandas as pd
import tqdm.auto as tqdm
import pyranges as pr

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Viz params
sns.set_theme(style="white")
sns.set_palette("Set2")

# ignore pandas warning
import warnings
warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None  # default='warn'

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.


In [3]:
os.chdir("/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/bin/data_analysis/peak_analysis")

In [4]:
from pyranges_utils import filter_peaks

# Path to blacklist
blacklist_path = "/cellar/users/aklie/data/ref/blacklists/hg38/temp.bed"
blacklist = pr.read_bed(blacklist_path)
blacklist

Unnamed: 0,Chromosome,Start,End
0,chr1,627846,636161
1,chr1,5849030,5851628
2,chr1,8908553,8911071
3,chr1,9573523,9576054
4,chr1,32042766,32045260
...,...,...,...
905,chrY,11289740,11335335
906,chrY,11491996,11593907
907,chrY,11669957,11672103
908,chrY,11720471,11750529


In [43]:
base_indir="/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse"
base_outdir = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/2024_01_12/timecourse"
timecourses = ["H1_control", "A2_control"]
timecourse = timecourses[1]
timecourse

'A2_control'

In [44]:
bw_outdir = os.path.join(base_outdir, "browser_session", "bigWigs")
peaks_outdir = os.path.join(base_outdir, "browser_session", "peaks")
# Make if it doesn't exist
os.makedirs(bw_outdir, exist_ok=True)
os.makedirs(peaks_outdir, exist_ok=True)

## Get ArchR bigWigs and peak calls

In [45]:
# make sure it exists
archr_dir = os.path.join(base_indir, timecourse, "archr")

if not os.path.isdir(archr_dir):
    print(f"ArchR directory {archr_dir} does not exist!")

In [46]:
# BigWig file
print(os.listdir(os.path.join(archr_dir, "GroupBigWigs", "rna_annotation")))
file_to_mv = os.path.join(archr_dir, "GroupBigWigs", "rna_annotation", "SC.beta-TileSize-100-normMethod-ReadsInTSS-ArchR.bw")
print(file_to_mv)
# Copy file to outdir
cmd = f"cp {file_to_mv} {bw_outdir}/{timecourse}_ArchR_SC.beta.bw"
print(cmd)
os.system(cmd)

['SC.delta-TileSize-100-normMethod-ReadsInTSS-ArchR.bw', 'SC.alpha-TileSize-100-normMethod-ReadsInTSS-ArchR.bw', 'other-TileSize-100-normMethod-ReadsInTSS-ArchR.bw', 'SC.EC-TileSize-100-normMethod-ReadsInTSS-ArchR.bw', 'SC.beta-TileSize-100-normMethod-ReadsInTSS-ArchR.bw']
/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/archr/GroupBigWigs/rna_annotation/SC.beta-TileSize-100-normMethod-ReadsInTSS-ArchR.bw
cp /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/archr/GroupBigWigs/rna_annotation/SC.beta-TileSize-100-normMethod-ReadsInTSS-ArchR.bw /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/2024_01_12/timecourse/browser_session/bigWigs/A2_control_ArchR_SC.beta.bw


0

In [47]:
# Peak file
print(os.listdir(os.path.join(archr_dir, "PeakCalls", "SplitPeaks")))
file_to_mv = os.path.join(archr_dir, "PeakCalls", "SplitPeaks", "SC.beta.bed")

# load in peak file with pyranges
peaks = pr.read_bed(file_to_mv)
peaks = filter_peaks(peaks, blacklist=blacklist)

# Save to outdir
peaks.to_bed(os.path.join(peaks_outdir, f"{timecourse}_ArchR_SC.beta.bed"))

['SC.alpha.bed', 'other.bed', 'SC.delta.bed', 'consensus_peaks.bed', 'SC.beta.bed', 'SC.EC.filt.bed', 'consensus_peaks.filt.bed', 'SC.delta.filt.bed', 'SC.EC.bed', 'SC.beta.filt.bed', 'other.filt.bed', 'SC.alpha.filt.bed']


# Get SnapATAC2 v2.3 peak calls and bigWigs

In [48]:
# make sure it exists
snapatac2_dir = os.path.join(base_indir, timecourse, "snapatac2")

if not os.path.isdir(snapatac2_dir):
    print(f"SnapATAC2 directory {snapatac2_dir} does not exist!")

In [49]:
# BigWig file
print(os.listdir(os.path.join(snapatac2_dir, "bigWigs")))
file_to_mv = os.path.join(snapatac2_dir, "bigWigs", "SC.beta.bw")
print(file_to_mv)
cmd = f"cp {file_to_mv} {bw_outdir}/{timecourse}_SnapATAC2_v2.3_SC.beta.bw"
print(cmd)
os.system(cmd)

['SC.EC.bed.gz', 'SC.alpha.bed.gz', 'SC.delta.bw', 'other.bw', 'SC.beta.bed.gz', 'SC.EC.bw', 'SC.delta.bed.gz', 'other.bed.gz', 'SC.beta.bw', 'SC.alpha.bw']
/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/snapatac2/bigWigs/SC.beta.bw
cp /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/snapatac2/bigWigs/SC.beta.bw /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/2024_01_12/timecourse/browser_session/bigWigs/A2_control_SnapATAC2_v2.3_SC.beta.bw


0

In [50]:
# Peak file
print(os.listdir(os.path.join(snapatac2_dir, "peak_calls")))
file_to_mv = os.path.join(snapatac2_dir, "peak_calls", "SC.beta.NarrowPeak.gz")

# load in peak file with pyranges
peaks = pr.read_bed(file_to_mv)
peaks = filter_peaks(peaks, blacklist=blacklist)

# Save to outdir
peaks.to_bed(os.path.join(peaks_outdir, f"{timecourse}_SnapATAC2_v2.3_SC.beta.bed"))

['SC.delta.NarrowPeak.gz', 'other_insertion.bed.gz', 'pseudobulk_and_peakcalling.log', 'SC.beta.NarrowPeak.gz', 'SC.beta_insertion.bed.gz', 'SC.EC.NarrowPeak.gz', 'SC.EC_insertion.bed.gz', 'SC.alpha.NarrowPeak.gz', 'other.NarrowPeak.gz', 'SC.delta_insertion.bed.gz', 'SC.alpha_insertion.bed.gz']


# PycisTopic

In [51]:
# make sure it exists
pycistopic_dir = os.path.join(base_indir, timecourse,"pycistopic")

if not os.path.isdir(pycistopic_dir):
    print(f"PyCisTopic directory {pycistopic_dir} does not exist!")

In [52]:
# BigWig file
print(os.listdir(os.path.join(pycistopic_dir, "pseudobulk")))
file_to_mv = os.path.join(pycistopic_dir, "pseudobulk", "SC_beta.bw")
print(file_to_mv)
cmd = f"cp {file_to_mv} {bw_outdir}/{timecourse}_pycisTopic_SC.beta.bw"
print(cmd)
os.system(cmd)

['SC_delta.bw', 'SC_beta.bw', 'SC_EC.bw', 'other.bed.gz', 'SC_EC.bed.gz', 'SC_delta.bed.gz', 'SC_beta.bed.gz', 'other.bw', 'SC_alpha.bed.gz', 'SC_alpha.bw']
/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/pycistopic/pseudobulk/SC_beta.bw
cp /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/pycistopic/pseudobulk/SC_beta.bw /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/2024_01_12/timecourse/browser_session/bigWigs/A2_control_pycisTopic_SC.beta.bw


0

In [53]:
# Peak file
print(os.listdir(os.path.join(pycistopic_dir, "peak_calls")))
file_to_mv = os.path.join(pycistopic_dir, "peak_calls", "SC_beta_peaks.narrowPeak")

# load in peak file with pyranges
peaks = pr.read_bed(file_to_mv)
peaks = filter_peaks(peaks, blacklist=blacklist)

# Save to outdir
peaks.to_bed(os.path.join(peaks_outdir, f"{timecourse}_pycisTopic_SC.beta.bed"))

['SC_beta_peaks.narrowPeak', 'SC_delta_summits.bed', 'SC_EC_summits.bed', 'consensus_regions.bed', 'SC_alpha_peaks.xls', 'SC_delta_peaks.narrowPeak', 'SC_delta_peaks.filt.narrowPeak', 'SC_EC_peaks.xls', 'narrow_peaks_dict.pkl', 'other_summits.bed', 'SC_beta_peaks.filt.narrowPeak', 'SC_beta_summits.bed', 'other_peaks.narrowPeak', 'SC_EC_peaks.narrowPeak', 'SC_beta_peaks.xls', 'SC_alpha_peaks.filt.narrowPeak', 'SC_delta_peaks.xls', 'SC_EC_peaks.filt.narrowPeak', 'other_peaks.filt.narrowPeak', 'SC_alpha_summits.bed', 'SC_alpha_peaks.narrowPeak', 'other_peaks.xls']


# CellCommander

In [54]:
# make sure it exists
cellcommander_dir = os.path.join(base_indir, timecourse, "cellcommander")

if not os.path.isdir(cellcommander_dir):
    print(f"CellCommander directory {cellcommander_dir} does not exist!")

In [55]:
# BigWig file
print(os.listdir(os.path.join(cellcommander_dir, "atac", "bigWigs")))
file_to_mv = os.path.join(cellcommander_dir, "atac", "bigWigs", "SC.beta_deeptools.bw")
print(file_to_mv)
cmd = f"cp {file_to_mv} {bw_outdir}/{timecourse}_CellCommander_SC.beta_deeptools.bw"
print(cmd)
os.system(cmd)

['SC.beta_deeptools.bw', 'SC.delta_deeptools.bw', 'other_deeptools.bw', 'SC.alpha_deeptools.bw', 'SC.EC_deeptools.bw']
/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/cellcommander/atac/bigWigs/SC.beta_deeptools.bw
cp /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/annotation/timecourse/A2_control/cellcommander/atac/bigWigs/SC.beta_deeptools.bw /cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/2024_01_12/timecourse/browser_session/bigWigs/A2_control_CellCommander_SC.beta_deeptools.bw


0

In [56]:
# Peak file
print(os.listdir(os.path.join(cellcommander_dir, "atac", "peak_calls")))
file_to_mv = os.path.join(cellcommander_dir, "atac", "peak_calls", "SC.beta_peaks.narrowPeak")

# load in peak file with pyranges
peaks = pr.read_bed(file_to_mv)
peaks = filter_peaks(peaks, blacklist=blacklist)

# Save to outdir
peaks.to_bed(os.path.join(peaks_outdir, f"{timecourse}_CellCommander_SC.beta.bed"))

['other_summits.bed', 'other_peaks.narrowPeak', 'SC.alpha_summits.bed', 'SC.delta_control_lambda.bdg', 'SC.alpha_peaks.xls', 'SC.delta_peaks.narrowPeak', 'SC.delta_peaks.xls', 'other_control_lambda.bdg', 'other_peaks.xls', 'SC.delta_summits.bed', 'SC.EC_control_lambda.bdg', 'SC.beta_treat_pileup.bdg', 'SC.alpha_peaks.narrowPeak', 'SC.EC_treat_pileup.bdg', 'SC.beta_control_lambda.bdg', 'SC.beta_peaks.xls', 'SC.EC_peaks.narrowPeak', 'SC.beta_peaks.narrowPeak', 'SC.EC_peaks.xls', 'SC.beta_summits.bed', 'SC.alpha_control_lambda.bdg', 'SC.delta_treat_pileup.bdg', 'SC.EC_summits.bed', 'other_treat_pileup.bdg', 'SC.alpha_treat_pileup.bdg']


# ENCODE ATAC pipeline

In [57]:
# make sure it exists
encode_atac_pipeline_dir = os.path.join(base_indir, timecourse, "cellcommander", "atac", "ENCODE_peak_calls")

if not os.path.isdir(encode_atac_pipeline_dir):
    print(f"Encode ATAC pipeline directory {encode_atac_pipeline_dir} does not exist!")


In [58]:
# BigWig file
print(os.listdir(os.path.join(encode_atac_pipeline_dir)))
file_to_mv = os.path.join(encode_atac_pipeline_dir, "SC.beta.pooled.fc.signal.bigwig")
print(file_to_mv)
cmd = f"cp {file_to_mv} {bw_outdir}/{timecourse}_ENCODE_atac_pipeline_SC.beta.pooled.fc.signal.bigwig"
print(cmd)
os.system(cmd)

['SC.delta.pooled.pval0.05.300K.narrowPeak.gz', 'other.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz', 'SC.EC.pooled.pval0.05.300K.narrowPeak.gz', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.EC.pooled.pval0.05.300K.bfilt.narrowPeak.starch', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.starch', 'SC.beta.pooled.fc.signal.bigwig', 'SC.EC.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'SC.delta.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz', 'SC.alpha.pooled.pval0.05.300K.narrowPeak', 'SC.alpha.pooled.fc.signal.bigwig', 'other.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'other.pooled.pval0.05.300K.narrowPeak.gz', 'other.pooled.pval0.05.300K.bfilt.frip.qc', 'SC.alpha.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'other.pooled.pval0.05.300K.annotated.bed', 'SC.delta.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.alpha.pooled.pval0.05.300K.annotated.bed', 'SC.delta.pooled.fc.signal.bigwig', 'SC

0

In [59]:
# Peak file
print(os.listdir(os.path.join(encode_atac_pipeline_dir)))
file_to_mv = os.path.join(encode_atac_pipeline_dir, "SC.beta.pooled.pval0.05.300K.narrowPeak")

# load in peak file with pyranges
peaks = pr.read_bed(file_to_mv)
peaks = filter_peaks(peaks, blacklist=blacklist)

# Save to outdir
peaks.to_bed(os.path.join(peaks_outdir, f"{timecourse}_ENCODE_atac_pipeline_SC.beta.bed"))

['SC.delta.pooled.pval0.05.300K.narrowPeak.gz', 'other.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz', 'SC.EC.pooled.pval0.05.300K.narrowPeak.gz', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.EC.pooled.pval0.05.300K.bfilt.narrowPeak.starch', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.starch', 'SC.beta.pooled.fc.signal.bigwig', 'SC.EC.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'SC.delta.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz', 'SC.alpha.pooled.pval0.05.300K.narrowPeak', 'SC.alpha.pooled.fc.signal.bigwig', 'other.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'other.pooled.pval0.05.300K.narrowPeak.gz', 'other.pooled.pval0.05.300K.bfilt.frip.qc', 'SC.alpha.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.beta.pooled.pval0.05.300K.bfilt.narrowPeak.hammock.gz.tbi', 'other.pooled.pval0.05.300K.annotated.bed', 'SC.delta.pooled.pval0.05.300K.bfilt.narrowPeak.gz', 'SC.alpha.pooled.pval0.05.300K.annotated.bed', 'SC.delta.pooled.fc.signal.bigwig', 'SC

# DONE!

---