In [5]:
import os
import sys
import glob
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('max_colwidth', 400)

import config
os.chdir(config.LOOP_CATALOG_DIR)

In [6]:
outdir = 'results/tables/peaks/'
os.makedirs(outdir, exist_ok=True)

In [7]:
def get_chipseq_std_sample_name(x):
    s = os.path.basename(x)
    s = s.split('.macs2_peaks')[0]
    return(s)

def get_hicpro_matches(x):
    res = mapper.loc[x]['hicpro_std_sample_name']
    
    if type(res) == str:
        return(res)
    else:
        res = ', '.join(res.tolist())
    return(res)

## hg38 Data

In [8]:
# load fc peaks
fn = f"results/tables/final.all_batches/hg38.unmerged_peaks.chipseq.03_08_24.tsv"
hg38_fc_peaks = pd.read_table(fn)
hg38_fc_peaks = hg38_fc_peaks.loc[hg38_fc_peaks.num_peaks_chipseq != -1]

# load mapper 
fn = 'results/samplesheets/post-hicpro/2024.2.15.10.52.peaks_files_chipseq.all_batches.samplesheet.without_header.tsv'
mapper = pd.read_table(fn, names=['hicpro_std_sample_name', 'vp_path', 'chipseq_path'])
mapper['chipseq_std_sample_name'] = mapper['chipseq_path'].apply(get_chipseq_std_sample_name)
mapper.set_index('chipseq_std_sample_name', inplace=True)

# create a list of associated hicpro std sample name
hg38_fc_peaks['associated_hicpro_std_names'] = hg38_fc_peaks.chipseq_std_sample_name.apply(get_hicpro_matches)

# save to file
outfn = os.path.join(outdir, 'hg38.chipseq_peaks.supp_table.tsv')
hg38_fc_peaks.to_csv(outfn, sep='\t', index=False); os.chmod(outfn, 0o664)

## mm10 Data

In [9]:
# load fc peaks
fn = f"results/tables/final.all_batches/mm10.unmerged_peaks.chipseq.03_08_24.tsv"    
mm10_fc_peaks = pd.read_table(fn)
mm10_fc_peaks = mm10_fc_peaks.loc[mm10_fc_peaks.num_peaks_chipseq != -1]

# load mapper 
fn = 'results/samplesheets/post-hicpro/2024.2.15.10.52.peaks_files_chipseq.all_batches.samplesheet.without_header.tsv'
mapper = pd.read_table(fn, names=['hicpro_std_sample_name', 'vp_path', 'chipseq_path'])
mapper['chipseq_std_sample_name'] = mapper['chipseq_path'].apply(get_chipseq_std_sample_name)
mapper.set_index('chipseq_std_sample_name', inplace=True)

# create a list of associated hicpro std sample name
mm10_fc_peaks['associated_hicpro_std_names'] = mm10_fc_peaks.chipseq_std_sample_name.apply(get_hicpro_matches)

# save to file
outfn = os.path.join(outdir, 'mm10.chipseq_peaks.supp_table.tsv')
mm10_fc_peaks.to_csv(outfn, sep='\t', index=False); os.chmod(outfn, 0o664)