In [1]:
import pandas as pd
import numpy as np
import os
import gcsfs
from google import auth

In [2]:
project_name = 'broad-getzlab-workflows'
credentials, _ = auth.default()
gcs = gcsfs.GCSFileSystem(project_name, token=credentials)

Used https://genome.ucsc.edu/cgi-bin/hgLiftOver to lift over from hg38 to hg19

In [3]:
dir_interval_lists = './dlbcl_known_enhancers/'
genome_reference = 'hg19'

In [4]:
enhancer_lists = [f for f in os.listdir(dir_interval_lists) if (genome_reference in f) and ('promoter' not in f) and ('bed12' not in f)]
promoter_lists = [f for f in os.listdir(dir_interval_lists) if (genome_reference in f) and ('promoter' in f) and ('bed12' not in f)]

In [5]:
enhancer_lists

['Payton_DLBCL_superenhancers.hg19.sort.bed',
 'LY1.hg19.bed',
 'Immunoglobulin_functional_elements.sort.hg19.bed',
 'Pan_Hammarstrom_kategis.hg19.sort.bed',
 'all.sample_gt15.score_gt1.hg19.bed',
 'Bradner_superenhancers.hg19.sort.bed',
 'all.sample_gt5.score_gt1.hg19.bed']

# Promoters

## Loading promoter tables

In [6]:
dict_promoters = {}
for f in promoter_lists:
    df = pd.read_csv(dir_interval_lists + f, sep = '\t', names=['chromosome', 'start', 'end', 'gene', 'drop'])
    df = df[df.columns[:-1]]
    dict_promoters[f] = df
    display(df)

Unnamed: 0,chromosome,start,end,gene
0,chr1,915901,916367,PERM1
1,chr1,917392,918058,PERM1
2,chr1,1690179,1690620,NADK
3,chr1,1709555,1710848,NADK
4,chr1,1821905,1823065,GNB1
...,...,...,...,...
3161,chrX,154250890,154251188,F8
3162,chrX,154299263,154300012,"BRCC3,MTCP1"
3163,chrX,154841930,154842809,SPRY3
3164,chrX,154996925,154997508,SPRY3


## Checking whether genomic locations are local to chromosomes

In [7]:
for c in sorted([int(ch[3:]) for ch in df.chromosome.unique() if ch[3:] not in ['X', 'Y']]):
    idx = 'chr'+str(c)
    print(idx, df.loc[df.chromosome==idx, 'start'].min(), df.loc[df.chromosome==idx, 'end'].max())

chr1 915901 248524780
chr2 1417958 242499125
chr3 1134442 197687216
chr4 52976 187645939
chr5 218165 180415934
chr6 391044 170863573
chr7 766136 158497912
chr8 1771788 145743470
chr9 214569 140445635
chr10 735220 135342980
chr11 535133 134094669
chr12 498371 133339056
chr13 19755907 114239632
chr14 20801125 107287760
chr15 22892423 102030108
chr16 402146 89883320
chr17 1303378 80798526
chr18 2570892 76740768
chr19 409070 58874427
chr20 524292 62796131
chr21 14981532 47573884
chr22 17073819 50946816


## Converting intervals to interval sets with [bed12 format](https://github.com/maxwellsh/DIGDriver/wiki/06:-Analyzing-new-interval-sets#defining-sets-of-intervals)

In [8]:
for key in dict_promoters:
    df = dict_promoters[key]
    il_name = key.split('.bed')[0].replace('.', '_')
    # data frame containing genes associated with each interval
    df_genes = df.gene.str.split(',', expand=True)
    # unique list of all genes in interval list
    genes = []
    for col in df_genes.columns:
        genes += df_genes[col][~df_genes[col].isna()].tolist()
    genes = np.unique(genes)
    # columns of bed12 file 
    cols_bed12 = ['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
    data = []
    for g in genes:
        dfi = df.loc[np.any(df_genes==g, axis=1)].copy().sort_values(['chromosome', 'start'], ignore_index=True)
        # chromosome number
        chrom = dfi.chromosome.unique()[0][3:]
        # start position of first interval in set (zero-indexed)
        start = dfi.start.min()
        # end position of final interval in set (zero-indexed)
        end = dfi.end.max()
        # unique name for this set of annotations
        name = '{}::dlbcl::{}::NA'.format(il_name.replace('_', ''), g)
        # any non-negative number (we usually use 0)
        score = 1000
        # +, -, or . if no strand
        strand = '.'
        # same as start (for Dig)
        thickStart = start
        # same as end (for Dig)
        thickEnd = end
        # ignored (we usually set to 0)
        itemRgb = 0
        # number of intervals in the set
        blockCount = len(dfi) 
        # # length of each interval in the set
        blockSizes = ','.join((dfi.end - dfi.start).astype(str).tolist()) + ','
        # # start position of each interval relative to start (so the first entry is always 0)
        blockStarts = ','.join((dfi.start - dfi.start.min()).astype(str).tolist()) + ','
        data.append([chrom, start, end, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts])
    df_bed12 = pd.DataFrame(data=data, columns=cols_bed12).sort_values(['chrom', 'start'], ignore_index=True)
    df_bed12.to_csv(dir_interval_lists + il_name + '_bed12.bed', sep='\t', index=False, header=False, lineterminator='\n')
df_bed12.head(20)

Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,915901,918058,hg19promoters::dlbcl::PERM1::NA,1000,.,915901,918058,0,2,466666,1491
1,1,1690179,1710848,hg19promoters::dlbcl::NADK::NA,1000,.,1690179,1710848,0,2,4411293,19376
2,1,1821905,1823065,hg19promoters::dlbcl::GNB1::NA,1000,.,1821905,1823065,0,1,1160,0
3,1,1935120,1935421,hg19promoters::dlbcl::CFAP74::NA,1000,.,1935120,1935421,0,1,301,0
4,1,1981486,2036709,hg19promoters::dlbcl::PRKCZ::NA,1000,.,1981486,2036709,0,3,466469637,2345654586
5,1,2158629,2160105,hg19promoters::dlbcl::SKI::NA,1000,.,2158629,2160105,0,1,1476,0
6,1,2398690,2406854,hg19promoters::dlbcl::PLCH2::NA,1000,.,2398690,2406854,0,2,300429,7735
7,1,2487075,2488155,hg19promoters::dlbcl::TNFRSF14::NA,1000,.,2487075,2488155,0,1,1080,0
8,1,2985601,2986987,hg19promoters::dlbcl::PRDM16::NA,1000,.,2985601,2986987,0,1,1386,0
9,1,3568079,3615958,hg19promoters::dlbcl::TP73::NA,1000,.,3568079,3615958,0,3,1167255563,3895447316


In [9]:
pd.read_csv('./interval_lists/gc19_pc.prom.bed', sep='\t')

Unnamed: 0,1,2482077,2492453,gc19_pc.prom::gencode::TNFRSF14::ENSG00000157873.13,1000,+,2482077.1,2492453.1,0,7,"104,200,117,98,262,68,228,","0,4800,5035,5629,8902,9346,10148,"
0,1,2985531,3316553,gc19_pc.prom::gencode::PRDM16::ENSG00000142611.12,1000,+,2985531,3316553,0,8,20010813518825018091168,0335174964175176175539327323327632330854
1,1,6257617,6272948,gc19_pc.prom::gencode::RPL22::ENSG00000116251.5,1000,-,6257617,6272948,0,13,88181102200200662005044465149,"0,219,1796,1996,2055,3285,5737,5937,11487,1183..."
2,1,6845183,7806225,gc19_pc.prom::gencode::CAMTA1::ENSG00000171735.14,1000,+,6845183,7806225,0,9,2001492331166110113211868,"0,458,3971,951084,951423,959590,959905,960583,..."
3,1,11175720,11322764,gc19_pc.prom::gencode::MTOR::ENSG00000198793.8,1000,-,11175720,11322764,0,8,20020066857264239131200,01589516630186241882319295146644146844
4,1,16170358,16207440,gc19_pc.prom::gencode::SPEN::ENSG00000065526.6,1000,+,16170358,16207440,0,10,234860200164256251200362218215,"0,828,3800,7479,8257,26581,29438,30610,32821,3..."
...,...,...,...,...,...,...,...,...,...,...,...,...
20033,Y,27768063,27768263,gc19_pc.prom::gencode::CDY1::ENSG00000172288.6,1000,+,27768063,27768263,0,1,200,0
20034,Y,28114689,28115089,gc19_pc.prom::gencode::AC007965.1::ENSG0000026...,1000,-,28114689,28115089,0,2,180200,0200
20035,Y,59100279,59100479,gc19_pc.prom::gencode::SPRY3::ENSGR0000168939.6,1000,+,59100279,59100479,0,1,200,0
20036,Y,59213761,59214246,gc19_pc.prom::gencode::VAMP7::ENSGR0000124333.10,1000,+,59213761,59214246,0,2,200123,0362


In [10]:
dict_enhancers = {}
for f in enhancer_lists:
    df = pd.read_csv(dir_interval_lists + f, sep = '\t', names=['chromosome', 'start', 'end', 'enhancer_hg38', 'drop'])
    df = df[df.columns[:-1]]
    if df.enhancer_hg38.astype(str).str[:3].unique()[0] == 'chr':
        dict_enhancers[f] = df
        display(df)

Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr1,1705210,1715089,chr1:1773772-1783650
1,chr1,1751224,1841253,chr1:1819786-1909814
2,chr1,2222043,2256622,chr1:2290605-2325183
3,chr1,2476985,2481874,chr1:2545547-2550435
4,chr1,3571879,3595579,chr1:3655316-3679015
...,...,...,...,...
1424,chr9,135245814,135261884,chr9:132370428-132386497
1425,chr9,135978382,136023022,chr9:133102996-133147635
1426,chr9,136809701,136845830,chr9:133944580-133980708
1427,chr9,139114103,139137520,chr9:136222258-136245674


Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr14,106032614,106032974,chr14:105566278-105566637
1,chr14,106041491,106041971,chr14:105575155-105575634
2,chr14,106048351,106048676,chr14:105582015-105582339
3,chr14,106050491,106054731,chr14:105584155-105588394
4,chr14,106054315,106056795,chr14:105587979-105590458
...,...,...,...,...
90,chr22,21025149,21025185,chr22:22026077-22881392
91,chr13,48996567,48996634,chr22:22026077-22881392
92,chr3,186029623,186029950,chr22:22026077-22881392
93,chr22,23235933,23265082,chr22:22893754-22922910


Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr1,150546616,150552004,chr1:150574141-150579528
1,chr1,203273727,203276488,chr1:203304600-203307360
2,chr1,226924119,226926747,chr1:226736419-226739046
3,chr10,63659531,63666480,chr10:61899773-61906721
4,chr11,128389025,128392119,chr11:128519131-128522224
5,chr12,8762070,8765407,chr12:8609475-8612811
6,chr12,92535690,92540320,chr12:92141915-92146544
7,chr12,113493744,113519862,chr12:113055940-113082057
8,chr12,122454746,122467180,chr12:122016841-122029274
9,chr13,75981170,75984325,chr13:75407035-75410189


Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr1,1342400,1342510,chr1:1407021-1407130
1,chr1,6258480,6259620,chr1:6198421-6199560
2,chr1,8021850,8022220,chr1:7961791-7962160
3,chr1,11072800,11073390,chr1:11012744-11013333
4,chr1,16839600,16841510,chr1:16513106-16515015
...,...,...,...,...
747,chr9,130834640,130834790,chr9:128072362-128072511
748,chr9,134553080,134553280,chr9:131677694-131677893
749,chrX,12974220,12975860,chrX:12956102-12957741
750,chrX,24073160,24073260,chrX:24055044-24055143


Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr1,1706412,1732343,chr1:1774973-1800904
1,chr1,1751668,1842316,chr1:1820229-1910877
2,chr1,4059831,4133780,chr1:3999771-4073720
3,chr1,6616886,6664806,chr1:6556826-6604746
4,chr1,7391931,7440079,chr1:7331871-7380018
...,...,...,...,...
2294,chrX,131613124,131626145,chrX:132479096-132492117
2295,chrX,135817164,135864668,chrX:136735005-136782509
2296,chrX,153229992,153241022,chrX:153964541-153975571
2297,chrX,153245353,153256834,chrX:153979902-153991383


Unnamed: 0,chromosome,start,end,enhancer_hg38
0,chr1,27610,29320,chr1:27611-29320
1,chr1,713220,713660,chr1:777841-778280
2,chr1,724060,727180,chr1:788681-791800
3,chr1,824880,825300,chr1:889501-889920
4,chr1,902180,902660,chr1:966801-967280
...,...,...,...,...
18749,chrY,58827340,58827540,chrY:56763331-56763530
18750,chrY,58855890,58856100,chrY:56734771-56734980
18751,chrY,58971880,58973420,chrY:56825734-56827273
18752,chrY,58974460,58974660,chrY:56828314-56828513


In [11]:
for key in dict_enhancers:
    df = dict_enhancers[key]
    il_name = key.split('.bed')[0].replace('.', '_')
    # data frame containing enhancers associated with each interval
    df_enhancers = df.enhancer_hg38.str.split(',', expand=True)
    # unique list of all enhancers in interval list
    enhancers = []
    for col in df_enhancers.columns:
        enhancers += df_enhancers[col][~df_enhancers[col].isna()].tolist()
    enhancers = np.unique(enhancers)
    # columns of bed12 file 
    cols_bed12 = ['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']
    data = []
    for e in enhancers:
        dfi = df.loc[np.any(df_enhancers==e, axis=1)].copy().sort_values(['chromosome', 'start'], ignore_index=True)
        # chromosome number
        chrom = dfi.chromosome.unique()[0][3:]
        # start position of first interval in set (zero-indexed)
        start = dfi.start.min()
        # end position of final interval in set (zero-indexed)
        end = dfi.end.max()
        # unique name for this set of annotations
        name = '{}::dlbcl::{}::NA'.format(il_name.replace('_', ''), e)
        # any non-negative number (we usually use 0)
        score = 1000
        # +, -, or . if no strand
        strand = '.'
        # same as start (for Dig)
        thickStart = start
        # same as end (for Dig)
        thickEnd = end
        # ignored (we usually set to 0)
        itemRgb = 0
        # number of intervals in the set
        blockCount = len(dfi)
        # # length of each interval in the set
        blockSizes = ','.join((dfi.end - dfi.start).astype(str).tolist()) + ','
        # # start position of each interval relative to start (so the first entry is always 0)
        blockStarts = ','.join((dfi.start - dfi.start.min()).astype(str).tolist()) + ','
        data.append([chrom, start, end, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts])
        
    df_bed12 = pd.DataFrame(data=data, columns=cols_bed12).sort_values(['chrom', 'start'], ignore_index=True)
    df_bed12.to_csv(dir_interval_lists + il_name + '_bed12.bed', sep='\t', index=False, header=False, lineterminator='\n')
    display(df_bed12.head(20))

Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,1705210,1715089,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,1705210,1715089,0,1,9879,0
1,1,1751224,1841253,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,1751224,1841253,0,1,90029,0
2,1,2222043,2256622,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,2222043,2256622,0,1,34579,0
3,1,2476985,2481874,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,2476985,2481874,0,1,4889,0
4,1,3571879,3595579,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,3571879,3595579,0,1,23700,0
5,1,9120100,9143791,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,9120100,9143791,0,1,23691,0
6,1,9460230,9489886,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,9460230,9489886,0,1,29656,0
7,1,9711773,9793450,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,9711773,9793450,0,1,81677,0
8,1,11862760,11876548,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,11862760,11876548,0,1,13788,0
9,1,12099036,12112488,PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...,1000,.,12099036,12112488,0,1,13452,0


Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,4591240,148924163,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,4591240,148924163,0,12,3175623551901025134275742,"112567135,144331824,144332172,144332888,342997..."
1,1,21207433,223477546,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,21207433,223477546,0,44,"39,74,116,36,51,37,29,62,47,36,63,72,80,73,148...","70725248,202270039,105798379,34442507,85245375..."
2,13,21025149,186029950,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,21025149,186029950,0,4,6736843098327,2797141801355325165004474
3,14,106032614,106032974,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106032614,106032974,0,1,360,0
4,14,106041491,106041971,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106041491,106041971,0,1,480,0
5,14,106048351,106048676,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106048351,106048676,0,1,325,0
6,14,106050491,106054731,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106050491,106054731,0,1,4240,0
7,14,106054315,106056795,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106054315,106056795,0,1,2480,0
8,14,106064028,106068064,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106064028,106068064,0,1,4036,0
9,14,106069177,106070455,Immunoglobulinfunctionalelementssorthg19::dlbc...,1000,.,106069177,106070455,0,1,1278,0


Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,21207433,223477546,PanHammarstromkategishg19sort::dlbcl::chr14:10...,1000,.,21207433,223477546,0,14,397411636613555531324216001024253,"70725248,202270039,105798379,34442507,85308250..."
1,1,150546616,150552004,PanHammarstromkategishg19sort::dlbcl::chr1:150...,1000,.,150546616,150552004,0,1,5388,0
2,1,203273727,203276488,PanHammarstromkategishg19sort::dlbcl::chr1:203...,1000,.,203273727,203276488,0,1,2761,0
3,1,226924119,226926747,PanHammarstromkategishg19sort::dlbcl::chr1:226...,1000,.,226924119,226926747,0,1,2628,0
4,10,63659531,63666480,PanHammarstromkategishg19sort::dlbcl::chr10:61...,1000,.,63659531,63666480,0,1,6949,0
5,11,128389025,128392119,PanHammarstromkategishg19sort::dlbcl::chr11:12...,1000,.,128389025,128392119,0,1,3094,0
6,12,8762070,8765407,PanHammarstromkategishg19sort::dlbcl::chr12:86...,1000,.,8762070,8765407,0,1,3337,0
7,12,92535690,92540320,PanHammarstromkategishg19sort::dlbcl::chr12:92...,1000,.,92535690,92540320,0,1,4630,0
8,12,113493744,113519862,PanHammarstromkategishg19sort::dlbcl::chr12:11...,1000,.,113493744,113519862,0,1,26118,0
9,12,122454746,122467180,PanHammarstromkategishg19sort::dlbcl::chr12:12...,1000,.,122454746,122467180,0,1,12434,0


Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,1342400,1342510,allsamplegt15scoregt1hg19::dlbcl::chr1:1407021...,1000,.,1342400,1342510,0,1,110,0
1,1,6258480,6259620,allsamplegt15scoregt1hg19::dlbcl::chr1:6198421...,1000,.,6258480,6259620,0,1,1140,0
2,1,8021850,8022220,allsamplegt15scoregt1hg19::dlbcl::chr1:7961791...,1000,.,8021850,8022220,0,1,370,0
3,1,11072800,11073390,allsamplegt15scoregt1hg19::dlbcl::chr1:1101274...,1000,.,11072800,11073390,0,1,590,0
4,1,16839600,16841510,allsamplegt15scoregt1hg19::dlbcl::chr1:1651310...,1000,.,16839600,16841510,0,1,1910,0
5,1,17221810,17223710,allsamplegt15scoregt1hg19::dlbcl::chr1:1689531...,1000,.,17221810,17223710,0,1,1900,0
6,1,19923600,19924410,allsamplegt15scoregt1hg19::dlbcl::chr1:1959710...,1000,.,19923600,19924410,0,1,810,0
7,1,22352140,22352450,allsamplegt15scoregt1hg19::dlbcl::chr1:2202564...,1000,.,22352140,22352450,0,1,310,0
8,1,22379240,22379960,allsamplegt15scoregt1hg19::dlbcl::chr1:2205274...,1000,.,22379240,22379960,0,1,720,0
9,1,24151340,24151840,allsamplegt15scoregt1hg19::dlbcl::chr1:2382485...,1000,.,24151340,24151840,0,1,500,0


Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,1706412,1732343,Bradnersuperenhancershg19sort::dlbcl::chr1:177...,1000,.,1706412,1732343,0,1,25931,0
1,1,1751668,1842316,Bradnersuperenhancershg19sort::dlbcl::chr1:182...,1000,.,1751668,1842316,0,1,90648,0
2,1,4059831,4133780,Bradnersuperenhancershg19sort::dlbcl::chr1:399...,1000,.,4059831,4133780,0,1,73949,0
3,1,6616886,6664806,Bradnersuperenhancershg19sort::dlbcl::chr1:655...,1000,.,6616886,6664806,0,1,47920,0
4,1,7391931,7440079,Bradnersuperenhancershg19sort::dlbcl::chr1:733...,1000,.,7391931,7440079,0,1,48148,0
5,1,8454848,8501956,Bradnersuperenhancershg19sort::dlbcl::chr1:839...,1000,.,8454848,8501956,0,1,47108,0
6,1,8552366,8594467,Bradnersuperenhancershg19sort::dlbcl::chr1:849...,1000,.,8552366,8594467,0,1,42101,0
7,1,8933057,8944097,Bradnersuperenhancershg19sort::dlbcl::chr1:887...,1000,.,8933057,8944097,0,1,11040,0
8,1,9120065,9146380,Bradnersuperenhancershg19sort::dlbcl::chr1:906...,1000,.,9120065,9146380,0,1,26315,0
9,1,9685649,9719974,Bradnersuperenhancershg19sort::dlbcl::chr1:962...,1000,.,9685649,9719974,0,1,34325,0


Unnamed: 0,chrom,start,end,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,1,27610,29320,allsamplegt5scoregt1hg19::dlbcl::chr1:27611-29...,1000,.,27610,29320,0,1,1710,0
1,1,713220,713660,allsamplegt5scoregt1hg19::dlbcl::chr1:777841-7...,1000,.,713220,713660,0,1,440,0
2,1,724060,727180,allsamplegt5scoregt1hg19::dlbcl::chr1:788681-7...,1000,.,724060,727180,0,1,3120,0
3,1,824880,825300,allsamplegt5scoregt1hg19::dlbcl::chr1:889501-8...,1000,.,824880,825300,0,1,420,0
4,1,902180,902660,allsamplegt5scoregt1hg19::dlbcl::chr1:966801-9...,1000,.,902180,902660,0,1,480,0
5,1,948960,949360,allsamplegt5scoregt1hg19::dlbcl::chr1:1013581-...,1000,.,948960,949360,0,1,400,0
6,1,974090,974710,allsamplegt5scoregt1hg19::dlbcl::chr1:1038711-...,1000,.,974090,974710,0,1,620,0
7,1,1003720,1006540,allsamplegt5scoregt1hg19::dlbcl::chr1:1068341-...,1000,.,1003720,1006540,0,1,2820,0
8,1,1050380,1051430,allsamplegt5scoregt1hg19::dlbcl::chr1:1115001-...,1000,.,1050380,1051430,0,1,1050,0
9,1,1079440,1080580,allsamplegt5scoregt1hg19::dlbcl::chr1:1144061-...,1000,.,1079440,1080580,0,1,1140,0


In [12]:
pd.read_csv('./interval_lists/enhancers.bed', sep='\t')

Unnamed: 0,1,2266288,2266598,enhancers::chr1:2264800-2266800::NA::NA,904,.,2266288.1,2266598.1,0,1.1,"310,","0,"
0,1,3010985,3011105,enhancers::chr1:3010200-3011600::NA::NA,905,.,3010985,3011105,0,1,120,0
1,1,11197757,11200572,enhancers::chr1:11197600-11200600::NA::NA,909,.,11197757,11200572,0,3,66818158,013782757
2,1,11237375,11237715,enhancers::chr1:11235600-11238000::NA::NA,900,.,11237375,11237715,0,1,340,0
3,1,11246685,11248729,enhancers::chr1:11244800-11250000::NA::NA,911,.,11246685,11248729,0,2,669151,01893
4,1,11257069,11257242,enhancers::chr1:11257000-11258000::NA::NA,900,.,11257069,11257242,0,1,173,0
...,...,...,...,...,...,...,...,...,...,...,...,...
30810,X,154417120,154417479,enhancers::chrX:154416000-154417800::NA::NA,924,.,154417120,154417479,0,1,359,0
30811,X,154459477,154459797,enhancers::chrX:154459200-154460200::NA::NA,905,.,154459477,154459797,0,1,320,0
30812,X,154543197,154543363,enhancers::chrX:154543000-154543600::NA::NA,935,.,154543197,154543363,0,1,166,0
30813,X,154564200,154564800,enhancers::chrX:154564200-154564800::NA::NA,908,.,154564200,154564800,0,2,75317,0283
