# Jurkat Loop Hub

4-5 replicates of HiC samples for Jurkat cells have been generated and processed through HiC-Pro.

In [1]:
import os 
import numpy as np
import pandas as pd
import glob
import subprocess as sp
import json
import warnings
import pybedtools as pbt
import cooler
import seaborn as sns
from matplotlib import pyplot as plt
warnings.filterwarnings('ignore')

In [2]:
from chromolooper import sgls
import importlib
importlib.reload(sgls)

<module 'chromolooper.sgls' from '/mnt/bioadhoc-temp/Groups/vd-ay/jreyna/projects/chromolooper/chromolooper/sgls.py'>

In [3]:
# change the working directory
os.chdir('/mnt/BioHome/jreyna/jreyna-temp/projects/dchallenge')
pd.set_option('display.max_columns', 1000)

In [4]:
outdir = 'results/main/jurkat_loops/'
os.makedirs(outdir, exist_ok=True)

In [5]:
pbt.set_bedtools_path(sgls.BEDTOOLS_DIR)

## Loading the Extra Jurkat Loops

In [6]:
jurkat_loop_glob = '/mnt/BioAdHoc/Groups/vd-vijay/sourya/Projects/2020_IQTL_HiChIP/Data/' + \
                    'Jurkat_FitHiChIP_Loops/HiChIP_Samples_*/Out_FitHiChIP/' + \
                    'FitHiChIP_Peak2ALL_b5000_L10000_U3000000/P2PBckgr_1/Coverage_Bias/FitHiC_BiasCorr/' + \
                    'FitHiChIP.interactions_FitHiC_Q0.01.bed'
jurkat_loop_files = glob.glob(jurkat_loop_glob)
jurkat_loops = sgls.read_multiple_tables_to_df(jurkat_loop_files, header=0)
jurkat_loops = sgls.rename_col_with_ints(jurkat_loops, np.arange(0, 6), sgls.BEDPE_COLS[0:6])

jurkat_loops['sample'] = jurkat_loops['file'].apply(lambda x: x.split('/')[10])
jurkat_loops.columns = sgls.add_prefix_to_names(jurkat_loops.columns.tolist(), 'jurkatLs_')
jurkat_loops['jurkatLs_id'] = sgls.create_loop_id_col(jurkat_loops, extras=[27])

## Extract the top scoring loops at each pair of loci

In [179]:
jurkatLs_prefix = 'jurkatLs_'

In [9]:
coord_id_cols = sgls.add_prefix_to_names(sgls.BEDPE_COLS[0:6], jurkatLs_prefix)
best_pval_loops = sgls.get_grp_min(jurkat_loops, 'jurkatLs_p', coord_id_cols)

In [183]:
bedpe_cols = sgls.add_prefix_to_names(sgls.BEDPE_COLS[0:6] + ['p'], jurkatLs_prefix)

In [184]:
bedpe_cols

['jurkatLs_chrA',
 'jurkatLs_startA',
 'jurkatLs_endA',
 'jurkatLs_chrB',
 'jurkatLs_startB',
 'jurkatLs_endB',
 'jurkatLs_p']

In [186]:
best_pval_loops_bedpe = best_pval_loops.loc[:, bedpe_cols]

In [187]:
best_pval_loops_bedpe

Unnamed: 0,jurkatLs_chrA,jurkatLs_startA,jurkatLs_endA,jurkatLs_chrB,jurkatLs_startB,jurkatLs_endB,jurkatLs_p
3364,chrY,58985000,58990000,chrY,58995000,59000000,1.779359e-06
4958,chrY,58985000,58990000,chrY,58995000,59000000,1.770517e-06
4892,chrY,58985000,58990000,chrY,58995000,59000000,1.770827e-06
3424,chrY,58985000,58990000,chrY,58995000,59000000,1.777067e-06
3811,chrY,58985000,58990000,chrY,58995000,59000000,1.779459e-06
...,...,...,...,...,...,...,...
428,chr10,72165000,72170000,chr10,73635000,73640000,1.192323e-08
426,chr10,72165000,72170000,chr10,73635000,73640000,1.192461e-08
283,chr10,72165000,72170000,chr10,73635000,73640000,1.187159e-08
335,chr10,72165000,72170000,chr10,73635000,73640000,1.186005e-08


## Make a hub of all Jurkat loops

In [175]:
# getting colors for the hubs
colors = pd.read_table('results/refs/dice_colors/colors.revised.t1d-project.V2.tsv')
cd4_tcell_color = colors.set_index('color-name').loc['cd4t-cell', 'hex']

In [176]:
# getting a list of jurkat longrange files
jurkat_loop_glob = '/mnt/BioAdHoc/Groups/vd-vijay/sourya/Projects/2020_IQTL_HiChIP/Data/' + \
                    'Jurkat_FitHiChIP_Loops/HiChIP_Samples_*/Out_FitHiChIP/' + \
                    'FitHiChIP_Peak2ALL_b5000_L10000_U3000000/P2PBckgr_1/Coverage_Bias/FitHiC_BiasCorr/' + \
                    'FitHiChIP.interactions_FitHiC_Q0.01_WashU.bed.gz'
jurkat_loop_files = glob.glob(jurkat_loop_glob)

In [193]:
# make each hub entry
hub_entries = []
for fn in jurkat_loop_files:
    url = sgls.make_lji_url(fn)
    entry = sgls.make_washu_longrange_dict(name, url, color=cd4_tcell_color,)
    hub_entries.append(entry)

# write the washu hub
jurkat_json = os.path.join(outdir, 'jurkat.loops.config.txt')
sgls.create_washu_hub(hub_entries, jurkat_json)

# get the hub link
hub_link = sgls.make_lji_url(jurkat_json)
print(hub_link)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/jurkat_loops/jurkat.loops.config.txt


In [190]:
s = '/mnt/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/refs/gencode/v30/gencode.v30.annotation.grch37.genes_only.sorted.bed.gz'

In [192]:
print(sgls.make_lji_url(s))

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/refs/gencode/v30/gencode.v30.annotation.grch37.genes_only.sorted.bed.gz
