# Loop Catalog Mcool to TXT Formats

The focus of this notebook is to convert hg19 mcool data into longrange format for washu visualizations. This notebook starts from the hg19 mcool data that was generated in the `Loop-Catalog-hg38-to-hg19` and generates both bedpe and longrange files for easier data manipulation and visualization.

In [15]:
import os 
import numpy as np
import pandas as pd
import glob
import subprocess as sp
import json
import cooler
import warnings
warnings.filterwarnings('ignore')

# change the working directory
os.chdir('/mnt/BioHome/jreyna/jreyna-temp/projects/dchallenge')

pd.set_option('display.max_columns', 1000)

In [2]:
from chromolooper import sgls
import importlib
importlib.reload(sgls)

<module 'chromolooper.sgls' from '/mnt/bioadhoc-temp/Groups/vd-ay/jreyna/projects/chromolooper/chromolooper/sgls.py'>

In [3]:
datadir = 'results/main/loop_catalog/processing/hg38_to_hg19/liftover/'
outdir = 'results/main/loop_catalog/processing/hg38_to_hg19/washu/'
os.makedirs(outdir, exist_ok=True)

## Convert from Mcool to longrange and BEDPE

In [4]:
res = 5000
mcool_glob = os.path.join(datadir, '*/*.hg19.mcool')

for fn in glob.glob(mcool_glob):
    
    # get the method for loop calling 
    loop_method = fn.split('/')[6]

    # output data
    base_prefix = os.path.basename(fn).replace('mcool', '')
    os.makedirs(os.path.join(outdir, loop_method + '/'), exist_ok=True) # make dir if needed
    dump_fn = os.path.join(outdir, loop_method + '/', base_prefix + '5kb.bedpe')
    longrange_fn = os.path.join(outdir, loop_method + '/', base_prefix + 'res_{}.longrange.bed'.format(res))
    bedpe_fn = os.path.join(outdir, loop_method + '/', base_prefix + 'res_{}.bedpe'.format(res))

    # run sample if longrange has not been created
    if os.path.exists(longrange_fn):
        print('already created {}'.format(longrange_fn))
        continue
    else:
        print('creating {}'.format(longrange_fn))

    # dump the mcool file 
    dump_df = sgls.cooler_cli_mcool2bedpe(fn, res)
    dump_df.iloc[:, 6] = dump_df.iloc[:, 6] / 100 # have to remove a factor of 100 that I had to include prev
    dump_df['right'] = sgls.create_washu_second_anchor_col(dump_df,3,4,5,6) # add the washu second anchor info
     
    # create the bedpe file
    bedpe_df = dump_df.iloc[:, [0,1,2,3,4,5,6]]
    bedpe_df.to_csv(bedpe_fn, sep='\t', header=False, index=False)
        
    # create the longrange file
    lr_df = dump_df.iloc[:, [0,1,2,7]]
    lr_df.to_csv(longrange_fn, sep='\t', header=False, index=False)

already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs102943.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs301283.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs000379.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs101645.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs102580.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs301315.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed
already created results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/GM12878.GSE1014

##### Compress and index files 

In [5]:
for fn in glob.glob(os.path.join(outdir, '*/*.longrange.bed')):
    cmp_fn, idx_fn = sgls.bgzip(fn, keep_bed=True, index=True, force=True, verbose=False)

## Make hub for hiccups

In [17]:
# collect json formatted information
url_root = 'https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/'
hub = []
for fn in glob.glob(os.path.join(outdir, 'hiccups/*.longrange.bed.gz')):
        
    # get basename for url
    bn = os.path.basename(fn)
    
    # create the whole URL
    full_url = os.path.join(url_root, bn)                       
    print(full_url)
    
    #     # get cell name 
    cell_name = bn.split('.')[0]

    #     # use cell name to get cell metadata info
    #     cell_info = t1d_cells.loc[cell_name]
    #     print(cell_info)

    name = 'Loop Catalog HiCCuPs peaks for {}'.format(cell_name)
    #color = cell_colors.loc[cell_info['color-name'], 'hex']

    # create json entries
    entry = sgls.make_washu_longrange_dict(name, full_url)
    hub.append(entry)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/MyLa.GSE101498.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/Naive_Tcells.GSE101498.Homo_Sapiens.H3K27ac.b2.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/Treg.GSE101498.Homo_Sapiens.H3K27ac.b3.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/MyLa.GSE101498.Homo_Sapiens.H3K27ac.b2.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_cat

In [18]:
## save the hub
hub_fn = os.path.join(outdir, 'hiccups/hub.config.txt')
sgls.create_washu_hub(hub, hub_fn)

In [19]:
hub_link = sgls.make_lji_url(hub_fn)
print(hub_link)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/hiccups/hub.config.txt


## Make hub for fit_cp

In [20]:
# collect json formatted information
url_root = 'https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/'
hub = []
for fn in glob.glob(os.path.join(outdir, 'fit_cp/*.longrange.bed.gz')):
        
    # get basename for url
    bn = os.path.basename(fn)
    
    # create the whole URL
    full_url = os.path.join(url_root, bn)                       
    print(full_url)
    
    #     # get cell name 
    cell_name = bn.split('.')[0]

    #     # use cell name to get cell metadata info
    #     cell_info = t1d_cells.loc[cell_name]

    name = 'Loop Catalog fit_cp peaks for {}'.format(cell_name)   
    entry = sgls.make_washu_longrange_dict(name, full_url)

    #print(cell_info)
    
    entry['showOnHubLoad'] = True
    #color = cell_colors.loc[cell_info['color-name'], 'hex']
    entry['options'] = {'ensembleStyle': True, 'height': 200, 'color': 'red', 'displayMode': 'arc'}

    hub.append(entry)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs102943.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs101645.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs102580.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/lgs000379.GSE116193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/

In [21]:
## save the hub
hub_fn = os.path.join(outdir, 'fit_cp/hub.config.txt')
sgls.create_washu_hub(hub, hub_fn)

In [22]:
hub_link = sgls.make_lji_url(hub_fn)
print(hub_link)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_cp/hub.config.txt


## Make hub for fit_hp

In [23]:
# collect json formatted information
url_root = 'https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/'
hub = []
for fn in glob.glob(os.path.join(outdir, 'fit_hp/*.longrange.bed.gz')):
        
    # get basename for url
    bn = os.path.basename(fn)
    
    # create the whole URL
    full_url = os.path.join(url_root, bn)                       
    print(full_url)
    
    #     # get cell name 
    cell_name = bn.split('.')[0]

    #     # use cell name to get cell metadata info
    #     cell_info = t1d_cells.loc[cell_name]

    name = 'Loop Catalog fit_hp peaks for {}'.format(cell_name)   
    entry = sgls.make_washu_longrange_dict(name, full_url)

    #print(cell_info)
    
    entry['showOnHubLoad'] = True
    #color = cell_colors.loc[cell_info['color-name'], 'hex']
    entry['options'] = {'ensembleStyle': True, 'height': 200, 'color': 'red', 'displayMode': 'arc'}

    hub.append(entry)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/MyLa.GSE151193.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/GM12878.GSE101498.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/Naive_Tcells.GSE101498.Homo_Sapiens.H3K27ac.b1.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/GM12878.GSE101498.Homo_Sapiens.H3K27ac.b2.hg19.res_5000.longrange.bed.gz
https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_c

In [24]:
## save the hub
hub_fn = os.path.join(outdir, 'fit_hp/hub.config.txt')
sgls.create_washu_hub(hub, hub_fn)

In [25]:
hub_link = sgls.make_lji_url(hub_fn)
print(hub_link)

https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/jreyna/projects/dchallenge/results/main/loop_catalog/processing/hg38_to_hg19/washu/fit_hp/hub.config.txt
