# Intersect CS with Loops

We saw how Chiou et al., 2021 tried to use co-accessibility of snATAC-seq data in order to link SNPs to genes. Here we will try something very similar but instead use HiChIP data.

In [2]:
import os 
import numpy as np
import pandas as pd
import glob
import subprocess as sp
import json
import warnings
import pybedtools as pbt
import cooler
import seaborn as sns
from matplotlib import pyplot as plt
warnings.filterwarnings('ignore')

In [3]:
from chromolooper import sgls
import importlib
importlib.reload(sgls)

<module 'chromolooper.sgls' from '/mnt/bioadhoc-temp/Groups/vd-ay/jreyna/projects/chromolooper/chromolooper/sgls.py'>

In [4]:
# change the working directory
os.chdir('/mnt/BioHome/jreyna/jreyna-temp/projects/dchallenge')
pd.set_option('display.max_columns', 1000)

In [5]:
outdir = 'results/main/chiou_2021/analysis/intersect_cs_with_loops/'
os.makedirs(outdir, exist_ok=True)

In [114]:
!/mnt/BioApps/bedtools/bin/bedtools --version

bedtools v2.16.2-zip-466a9f0


In [42]:
pbt.set_bedtools_path(sgls.BEDTOOLS_DIR)
pbt.set_tempdir('/mnt/BioHome/jreyna/tmp/')

## Loading the Extra Jurkat Loops

In [7]:
jurkat_loop_glob = '/mnt/BioAdHoc/Groups/vd-vijay/sourya/Projects/2020_IQTL_HiChIP/Data/' + \
                    'Jurkat_FitHiChIP_Loops/HiChIP_Samples_*/Out_FitHiChIP/' + \
                    'FitHiChIP_Peak2ALL_b5000_L10000_U3000000/P2PBckgr_1/Coverage_Bias/FitHiC_BiasCorr/' + \
                    'FitHiChIP.interactions_FitHiC_Q0.01.bed'
jurkat_loop_files = glob.glob(jurkat_loop_glob)
jurkat_loops = sgls.read_multiple_tables_to_df(jurkat_loop_files, header=0)
jurkat_loops = sgls.rename_col_with_ints(jurkat_loops, np.arange(0, 6), sgls.BEDPE_COLS[0:6])

jurkat_loops['sample'] = jurkat_loops['file'].apply(lambda x: x.split('/')[10])
jurkat_loops.columns = sgls.add_prefix_to_names(jurkat_loops.columns.tolist(), 'jurkatLs_')
jurkat_loops['jurkatLs_id'] = sgls.create_loop_id_col(jurkat_loops, extras=[27])

In [8]:
jurkat_loop_cols = sgls.range_list(0,6) + [28]
jurkat_loop_cols = jurkat_loops.columns[jurkat_loop_cols].tolist()

In [9]:
jurkat_loops_pbt = pbt.BedTool.from_dataframe(jurkat_loops.loc[:, jurkat_loop_cols])

## Loading the SGLs

In [10]:
sgl_jurkat_fn = os.path.join(outdir, 'strongest_atac_jurkat_sgls.tsv')
strongest_atac_sgls_jurkat = pd.read_table(sgl_jurkat_fn) 

In [11]:
print('There are {} SGLs.'.format(strongest_atac_sgls_jurkat.shape[0]))

There are 98 SGLs.


In [12]:
sgl_snp_cols = ['loop_chrA',
                 'snp_start',
                 'snp_end',
                 'snp_rsid',
                 'snp_ppa',
                 'snp_signal_name']

In [13]:
# create a snp bed file in compressed and indexed mode
snps_df = strongest_atac_sgls_jurkat[sgl_snp_cols].drop_duplicates()
snp_fn = os.path.join(outdir, 'sgls.snps.bed')
sgls.df_to_bed_file(snps_df, snp_fn, sort=True, other_cols=[3,4,5])
snp_bed_gz, _ = sgls.bgzip(snp_fn)

In [14]:
print(sgls.make_lji_url(snp_bed_gz))

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/chiou_2021/analysis/intersect_cs_with_loops/sgls.snps.bed.gz


In [105]:
# build pbt for the strongests SGLs
strongest_loop_cols = sgls.add_prefix_to_names(sgls.BEDPE_COLS[0:6] + ['id'], 'loop_')
strongest_pbt = pbt.BedTool.from_dataframe(strongest_atac_sgls_jurkat[strongest_loop_cols])
strongest_pbt2 = pbt.BedTool.from_dataframe(strongest_atac_sgls_jurkat[strongest_loop_cols[0:3]])

In [16]:
strongest_jurkat_p2p = sgls.pairtopair_dataframe(strongest_atac_sgls_jurkat, jurkat_loops,
                                     strongest_loop_cols, jurkat_loop_cols)

In [28]:
best_loops = strongest_jurkat_p2p.loc[~strongest_jurkat_p2p.jurkatLs_p.isna(),:]
sgl_with_jurkatLs_fn = os.path.join(outdir, 'strongest_atac_jurkat_sgls.with_jurkatLoops.tsv')
sgls.df_to_tsv_file(best_loops, sgl_with_jurkatLs_fn)
sgls.make_lji_url(sgl_with_jurkatLs_fn)

'https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/chiou_2021/analysis/intersect_cs_with_loops/strongest_atac_jurkat_sgls.with_jurkatLoops.tsv'

In [147]:
importlib.reload(sgls)

<module 'chromolooper.sgls' from '/mnt/bioadhoc-temp/Groups/vd-ay/jreyna/projects/chromolooper/chromolooper/sgls.py'>

In [150]:
a = sgls.find_bedpe_like_columns(strongest_jurkat_p2p)