In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import datetime as dt
os.chdir('<project-dir>')

In [2]:
samples = ['Aortic-VIC.GSE154513.Homo_Sapiens.H3K27ac.b1']

In [3]:
peaks_files = 'results/samplesheets/post-hicpro/peaks_files.samplesheet.without_header.tsv'

In [4]:
def has_peaks(samples):
    has_chipseq_peaks = {}
    has_hp_peaks = {}
    has_fithichip_peaks = {}
    with open(peaks_files) as fr:
        for line in fr:
            if line.strip().split()[0] in samples:
                if os.path.exists(line.strip().split()[4]):
                    has_chipseq_peaks[line.strip().split()[0]] = True
                else:
                    has_chipseq_peaks[line.strip().split()[0]] = False
                if os.path.exists(line.strip().split()[2]):
                    has_hp_peaks[line.strip().split()[0]] = True
                else:
                    has_hp_peaks[line.strip().split()[0]] = False
                if os.path.exists(line.strip().split()[3]):
                    has_fithichip_peaks[line.strip().split()[0]] = True
                else:
                    has_fithichip_peaks[line.strip().split()[0]] = False
    return has_chipseq_peaks, has_hp_peaks, has_fithichip_peaks

In [5]:
def decide_hiccups(paths):
    has_feature = {}
    for sample in paths:
        if sample.strip().split('/')[3] in samples:
            f = sample + 'merged_loops.bedpe'
            if os.path.exists(f):
                with open(f, 'r') as f:
                    if len(f.readlines()) > 2:
                        has_feature[sample.strip().split('/')[3]] = True
                    else:
                        has_feature[sample.strip().split('/')[3]] = False
    return has_feature

In [6]:
def decide_fithichip(paths):
    has_feature = {}
    for sample in paths:
        sn = sample.strip().split('/')[3].split('.')
        rep = sn[4].split('_')[0]
        sn.pop()
        sn.pop()
        sn.append(rep)
        sample_name = ('.').join(sn)
        if sample_name in samples:
            for loose_config in ['5', '10', '25']:
                f = sample + 'L{res}/FitHiChIP_Peak2ALL_b{res}000_L20000_U2000000/P2PBckgr_0/Coverage_Bias/FitHiC_BiasCorr/FitHiChIP-L{res}.interactions_FitHiC_Q0.01.bed'.format(res = loose_config)
                if os.path.exists(f):
                    with open(f, 'r') as f:
                        if len(f.readlines()) > 1:
                            has_feature[sample_name + "_L" + loose_config] = True
                        else:
                            has_feature[sample_name + "_L" + loose_config] = False
            for strict_config in ['5', '10', '25']:
                f = sample + 'S{res}/FitHiChIP_Peak2ALL_b{res}000_L20000_U2000000/P2PBckgr_1/Coverage_Bias/FitHiC_BiasCorr/FitHiChIP-S{res}.interactions_FitHiC_Q0.01.bed'.format(res = strict_config)
                if os.path.exists(f):
                    with open(f, 'r') as f:
                        if len(f.readlines()) > 1:
                            has_feature[sample_name + "_S" + strict_config] = True
                        else:
                            has_feature[sample_name + "_S" + strict_config] = False
    return has_feature

In [7]:
def has_loops(samples):
    has_fithichip_cs = {}
    has_fithichip_hp = {}
    has_fithichip_f = {}
    has_hiccups_chr1 = {}
    has_hiccups_all = {}
    
    fithichip_cs = glob.glob('results/loops/fithichip/*_chipseq.peaks/')
    fithichip_hp = glob.glob('results/loops/fithichip/*_hichip-peaks.peaks/')
    fithichip_f = glob.glob('results/loops/fithichip/*_fithichip.peaks/')
    hiccups_chr1 = glob.glob('results/loops/hiccups_chr1/*/')
    hiccups_all = glob.glob('results/loops/hiccups/*/')
    
    has_hiccups_chr1 = decide_hiccups(hiccups_chr1)
    has_hiccups_all = decide_hiccups(hiccups_all)
    has_fithichip_cs = decide_fithichip(fithichip_cs)
    has_fithichip_hp = decide_fithichip(fithichip_hp)
    has_fithichip_f = decide_fithichip(fithichip_f)

    return has_hiccups_chr1, has_hiccups_all, has_fithichip_cs, has_fithichip_hp, has_fithichip_f

## Get Sample Info

In [8]:
# enter desired std_sample_name(s) into this array
has_chipseq_peaks, has_hp_peaks, has_fithichip_peaks = has_peaks(samples)
has_hiccups_chr1, has_hiccups_all, has_fithichip_cs, has_fithichip_hp, has_fithichip_f = has_loops(samples)

In [9]:
# generate report for which data this sample has
for sample in samples:
    
    cs_peaks = has_chipseq_peaks.get(sample)
    hp_peaks = has_hp_peaks.get(sample)
    f_peaks = has_fithichip_peaks.get(sample)
    
    hiccups_chr1 = has_hiccups_chr1.get(sample)
    hiccups_all = has_hiccups_all.get(sample)
    
    fithichip_cs_L5 = has_fithichip_cs.get(sample + '_L5')
    fithichip_cs_L10 = has_fithichip_cs.get(sample + '_L10')
    fithichip_cs_L25 = has_fithichip_cs.get(sample + '_L25')
    fithichip_cs_S5 = has_fithichip_cs.get(sample + '_S5')
    fithichip_cs_S10 = has_fithichip_cs.get(sample + '_S10')
    fithichip_cs_S25 = has_fithichip_cs.get(sample + '_S25')
    
    fithichip_hp_L5 = has_fithichip_hp.get(sample + '_L5')
    fithichip_hp_L10 = has_fithichip_hp.get(sample + '_L10')
    fithichip_hp_L25 = has_fithichip_hp.get(sample + '_L25')
    fithichip_hp_S5 = has_fithichip_hp.get(sample + '_S5')
    fithichip_hp_S10 = has_fithichip_hp.get(sample + '_S10')
    fithichip_hp_S25 = has_fithichip_hp.get(sample + '_S25')
    
    fithichip_f_L5 = has_fithichip_f.get(sample + '_L5')
    fithichip_f_L10 = has_fithichip_f.get(sample + '_L10')
    fithichip_f_L25 = has_fithichip_f.get(sample + '_L25')
    fithichip_f_S5 = has_fithichip_f.get(sample + '_S5')
    fithichip_f_S10 = has_fithichip_f.get(sample + '_S10')
    fithichip_f_S25 = has_fithichip_f.get(sample + '_S25')
    
    print(sample, "has the following datatypes avaliable:")
    print()
    print("ChIP-seq peaks:", cs_peaks)
    print("HiChIP-Peaks peaks:", hp_peaks)
    print("FitHiChIP peaks:", f_peaks)
    print()
    
    print("FitHiChIP loops (ChIP-seq peaks, L5):", fithichip_cs_L5)
    print("FitHiChIP loops (ChIP-seq peaks, L10):", fithichip_cs_L10)
    print("FitHiChIP loops (ChIP-seq peaks, L25):", fithichip_cs_L25)
    print("FitHiChIP loops (ChIP-seq peaks, S5):", fithichip_cs_S5)
    print("FitHiChIP loops (ChIP-seq peaks, S10):", fithichip_cs_S10)
    print("FitHiChIP loops (ChIP-seq peaks, S25):", fithichip_cs_S25)
    print()
    
    print("FitHiChIP loops (HiChIP-Peaks peaks, L5):", fithichip_hp_L5)
    print("FitHiChIP loops (HiChIP-Peaks peaks, L10):", fithichip_hp_L10)
    print("FitHiChIP loops (HiChIP-Peaks peaks, L25):", fithichip_hp_L25)
    print("FitHiChIP loops (HiChIP-Peaks peaks, S5):", fithichip_hp_S5)
    print("FitHiChIP loops (HiChIP-Peaks peaks, S10):", fithichip_hp_S10)
    print("FitHiChIP loops (HiChIP-Peaks peaks, S25):", fithichip_hp_S25)
    print()
    
    print("FitHiChIP loops (FitHiChIP peaks, L5):", fithichip_f_L5)
    print("FitHiChIP loops (FitHiChIP peaks, L10):", fithichip_f_L10)
    print("FitHiChIP loops (FitHiChIP peaks, L25):", fithichip_f_L25)
    print("FitHiChIP loops (FitHiChIP peaks, S5):", fithichip_f_S5)
    print("FitHiChIP loops (FitHiChIP peaks, S10):", fithichip_f_S10)
    print("FitHiChIP loops (FitHiChIP peaks, S25):", fithichip_f_S25)
    print()
    
    print("HiCCUPS chr1 loops:", hiccups_chr1)
    print("HiCCUPS all chrs loops:", hiccups_all)
    print()

Aortic-VIC.GSE154513.Homo_Sapiens.H3K27ac.b1 has the following datatypes avaliable:

ChIP-seq peaks: True
HiChIP-Peaks peaks: True
FitHiChIP peaks: True

FitHiChIP loops (ChIP-seq peaks, L5): True
FitHiChIP loops (ChIP-seq peaks, L10): True
FitHiChIP loops (ChIP-seq peaks, L25): True
FitHiChIP loops (ChIP-seq peaks, S5): True
FitHiChIP loops (ChIP-seq peaks, S10): True
FitHiChIP loops (ChIP-seq peaks, S25): True

FitHiChIP loops (HiChIP-Peaks peaks, L5): True
FitHiChIP loops (HiChIP-Peaks peaks, L10): True
FitHiChIP loops (HiChIP-Peaks peaks, L25): True
FitHiChIP loops (HiChIP-Peaks peaks, S5): True
FitHiChIP loops (HiChIP-Peaks peaks, S10): True
FitHiChIP loops (HiChIP-Peaks peaks, S25): True

FitHiChIP loops (FitHiChIP peaks, L5): True
FitHiChIP loops (FitHiChIP peaks, L10): True
FitHiChIP loops (FitHiChIP peaks, L25): True
FitHiChIP loops (FitHiChIP peaks, S5): True
FitHiChIP loops (FitHiChIP peaks, S10): True
FitHiChIP loops (FitHiChIP peaks, S25): True

HiCCUPS chr1 loops: True
Hi

## WashU datahub

In [12]:
# open hub json file
hub = 'results/visualizations/washu/hub.config.json'
with open(hub, 'w') as f:
    
    # build json file
    f.write('[')
    
    for sample in samples:
        
        if has_chipseq_peaks.get(sample):
            f.write('\n\t{\n\t\"type\":\"bed\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/chipseq_peaks/' + sample + '.chipseq.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.chipseq.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#7393B3\"\n\t\t}\n\t},\n')
        
        for config in ['S5', 'L5', 'S10', 'L10', 'S25', 'L25']:
            if has_fithichip_cs.get(sample + '_' + config):
                f.write('\n\t{\n\t\"type\":\"longrange\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/fithichip_loops_chipseq/' + sample + '/' + config + '/' + sample + '.fithichip.' + config + '.loops.chipseq.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.fithichip.loops.' + config + '.chipseq.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#7393B3\",\n\t\t\"displayMode\":\"arc\"\n\t\t}\n\t},\n')
        
        if has_hp_peaks.get(sample):
            f.write('\n\t{\n\t\"type\":\"bed\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/hichip-peaks_peaks/' + sample + '.hichip-peaks.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.hichip-peaks.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#A95C68\"\n\t\t}\n\t},\n')
         
        for config in ['S5', 'L5', 'S10', 'L10', 'S25', 'L25']:
            if has_fithichip_hp.get(sample + '_' + config):
                f.write('\n\t{\n\t\"type\":\"longrange\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/fithichip_loops_hp/' + sample + '/' + config + '/' + sample + '.fithichip.' + config + '.loops.hichippeaks.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.fithichip.loops.' + config + '.hichippeaks.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#A95C68\",\n\t\t\"displayMode\":\"arc\"\n\t\t}\n\t},\n')
        
        if has_fithichip_peaks.get(sample):
            f.write('\n\t{\n\t\"type\":\"bed\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/fithichip_peaks/' + sample + '.fithichip.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.fithichip.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#2AAA8A\"\n\t\t}\n\t},\n')
        
        for config in ['S5', 'L5', 'S10', 'L10', 'S25', 'L25']:
            if has_fithichip_f.get(sample + '_' + config):
                f.write('\n\t{\n\t\"type\":\"longrange\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/fithichip_loops_fithichip/' + sample + '/' + config + '/' + sample + '.fithichip.' + config + '.loops.fithichip.peaks.txt.gz\",\n\t\"name\":\"' + sample + '.fithichip.loops.' + config + '.fithichip.peaks\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#2AAA8A\",\n\t\t\"displayMode\":\"arc\"\n\t\t}\n\t},\n')
        
        if has_hiccups_all.get(sample):
            f.write('\n\t{\n\t\"type\":\"longrange\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/hiccups_loops_all/' + sample + '.hiccups-allchrs.loops.txt.gz\",\n\t\"name\":\"' + sample + '.hiccups.loops.allchrs\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#5D3FD3\",\n\t\t\"displayMode\":\"arc\"\n\t\t}\n\t},\n')
            
        elif has_hiccups_chr1.get(sample):
            f.write('\n\t{\n\t\"type\":\"longrange\",\n\t\"url\":\"https://informaticsdata.liai.org/bioadhoc-temp/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/hiccups_loops_chr1/' + sample + '.hiccups-chr1.loops.txt.gz\",\n\t\"name\":\"' + sample + '.hiccups.loops.chr1\",\n\t\"showOnHubLoad\": true,\n\t\"options\": {\n\t\t\"height\":300,\n\t\t\"color\":\"#5D3FD3\",\n\t\t\"displayMode\":\"arc\"\n\t\t}\n\t},\n')

lines = open(hub, 'r').readlines()
new_last_line = (lines[-1].rstrip().split(',')[0])
lines[-1] = new_last_line
lines.append('\n]')
open(hub, 'w').writelines(lines)

In [16]:
print('URL:')
print('https://epigenomegateway.wustl.edu/browser/?genome=hg38&position=chr1:84000000-87000000&hub=https://informaticsdata.liai.org/BioAdHoc/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/hub.config.json')

URL:
https://epigenomegateway.wustl.edu/browser/?genome=hg38&position=chr1:84000000-87000000&hub=https://informaticsdata.liai.org/BioAdHoc/Groups/vd-ay/kfetter/hichip-db-loop-calling/results/visualizations/washu/hub.config.json


# UCSC trackHub

In [40]:
# hub files
hub = 'results/visualizations/ucsc/hubDirectory/hub.txt'
genomes = 'results/visualizations/ucsc/hubDirectory/genomes.txt'
trackdb = 'results/visualizations/ucsc/hubDirectory/hg38/trackDb.txt'

with open(hub, 'w') as f:
    f.write('hub hichip_hg38\nshortLabel hichip_hg38\nlongLabel hichip_visualization_hg38\ngenomesFile genomes.txt\nemail kfetter@lji.org')

with open(genomes, 'w') as f:
    f.write('genome hg38\ntrackDb hg38/trackDb.txt')

#with open(trackdb, 'w') as f:

    