# Constructing a GarNetDB file

The goal here is to construct a file in which all of the gene and transcription factor names exist in the same namespace. We use mygene.info's API to map all the gene names to a common namespace. It isn't clear that they have the most "canonical" namespace but at present they correlate best with genecards, and seems to be more consistent than all other namespaces I know of. 

In [2]:
%pylab inline
import sys
import os
import pickle
import sqlite3
import numpy as np
import pandas as pd

known_genes_file = '../data/ucsc_hg19_knownGenes.tsv'
kgXref_file = '../data/ucsc_hg19_kgXref.tsv'
motifs_file = '../example_data/motifmap.normalized.tsv'

Populating the interactive namespace from numpy and matplotlib


# Part I: Constructing a reference file

We use UCSC's [known genes](http://genome.ucsc.edu/cgi-bin/hgTables?hgsid=590772967_aCXvu74nAfyUAYeUksjLuUk1eBz3&clade=mammal&org=Human&db=hg19&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome&position=chr21%3A33031597-33041570&hgta_outputType=primaryTable&hgta_outFileName=) and [Cross Reference (kgXref) file](http://genome.ucsc.edu/cgi-bin/hgTables?hgsid=590772967_aCXvu74nAfyUAYeUksjLuUk1eBz3&clade=mammal&org=Human&db=hg19&hgta_group=genes&hgta_track=refGene&hgta_table=kgXref&hgta_regionType=genome&position=chr21%3A33%2C031%2C597-33%2C041%2C570&hgta_outputType=primaryTable&hgta_outFileName=) as our foundation. 

In [3]:
def parse_known_genes_file(known_genes_file, kgXref_file):
    """
    Parse the RefSeq known genes file into a pandas dataframe

    The known genes file format is the following:
    http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.sql

    Arguments:
        known_genes_file (string or FILE): file procured from RefSeq with full list of genes in genome
        kgXref_file (string or FILE): additional "Cross Reference" file with more details on those genes

    Returns:
        dataframe: known genes dataframe
    """

    known_genes_fieldnames = ["name","chrom","strand","txStart","txEnd","cdsStart","cdsEnd","exonCount","exonStarts","exonEnds","proteinID","alignID"]

    known_genes_dataframe = pd.read_csv(known_genes_file, delimiter='\t', names=known_genes_fieldnames)

    known_genes_dataframe.rename(index=str, columns={"txStart":"geneStart", "txEnd":"geneEnd", "name":"geneName","strand":"geneStrand"}, inplace=True)

    if kgXref_file:

        kgXref_fieldnames = ["kgID","mRNA","spID","spDisplayID","geneSymbol","refseq","protAcc","description"]
        kgXref_dataframe = pd.read_csv(kgXref_file, delimiter='\t', names=kgXref_fieldnames)

        known_genes_dataframe = known_genes_dataframe.merge(kgXref_dataframe, left_on='geneName', right_on='kgID', how='left')
        known_genes_dataframe.rename(index=str, columns={"geneName":"ucID", "geneSymbol":"geneName"}, inplace=True)

    return known_genes_dataframe


In [6]:
reference = parse_known_genes_file(known_genes_file, kgXref_file)
reference.head()

Unnamed: 0,ucID,chrom,geneStrand,geneStart,geneEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,proteinID,alignID,kgID,mRNA,spID,spDisplayID,geneName,refseq,protAcc,description
0,uc001aaa.3,chr1,+,11873,14409,11873,11873,3,118731261213220,122271272114409,,uc001aaa.3,uc001aaa.3,NR_046018,,,DDX11L1,NR_046018,,Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box ...
1,uc010nxr.1,chr1,+,11873,14409,11873,11873,3,118731264513220,122271269714409,,uc010nxr.1,uc010nxr.1,AM992878,,,DDX11L1,,,Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box ...
2,uc010nxq.1,chr1,+,11873,14409,12189,13639,3,118731259413402,122271272114409,B7ZGX9,uc010nxq.1,uc010nxq.1,AM992880,B7ZGX9,B7ZGX9_HUMAN,DDX11L1,,,Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box ...
3,uc009vis.3,chr1,-,14361,16765,14361,14361,4,14361149691579516606,14829150381594216765,,uc009vis.3,uc009vis.3,BC047449,,,WASH7P,,,Homo sapiens WAS protein family homolog 7 pseu...
4,uc009vjc.1,chr1,-,16857,17751,16857,16857,2,1685717232,1705517751,,uc009vjc.1,uc009vjc.1,AK291582,,,WASH7P,,,Homo sapiens WAS protein family homolog 7 pseu...


In [8]:
reference = reference[['geneName', 'ucID', 'chrom', 'geneStrand', 'geneStart', 'geneEnd']]
reference.head()

Unnamed: 0,geneName,ucID,chrom,geneStrand,geneStart,geneEnd
0,DDX11L1,uc001aaa.3,chr1,+,11873,14409
1,DDX11L1,uc010nxr.1,chr1,+,11873,14409
2,DDX11L1,uc010nxq.1,chr1,+,11873,14409
3,WASH7P,uc009vis.3,chr1,-,14361,16765
4,WASH7P,uc009vjc.1,chr1,-,16857,17751


In [11]:
reference[reference.geneName.isnull()]

Unnamed: 0,geneName,ucID,chrom,geneStrand,geneStart,geneEnd


### ...Amazing!

In [16]:
import mygene
mg = mygene.MyGeneInfo()
df = mg.querymany(np.unique(reference.geneName.values).tolist(), scopes=['symbol', 'name', 'alias'], fields=["HGNC", "symbol"], species="human", as_dataframe=True, returnall=True)
df

querying 1-1000...done.
querying 1001-2000...done.
querying 2001-3000...done.
querying 3001-4000...done.
querying 4001-5000...done.
querying 5001-6000...done.
querying 6001-7000...done.
querying 7001-8000...done.
querying 8001-9000...done.
querying 9001-10000...done.
querying 10001-11000...done.
querying 11001-12000...done.
querying 12001-13000...done.
querying 13001-14000...done.
querying 14001-15000...done.
querying 15001-16000...done.
querying 16001-17000...done.
querying 17001-18000...done.
querying 18001-19000...done.
querying 19001-20000...done.
querying 20001-21000...done.
querying 21001-22000...done.
querying 22001-23000...done.
querying 23001-24000...done.
querying 24001-25000...done.
querying 25001-26000...done.
querying 26001-27000...done.
querying 27001-28000...done.
querying 28001-28517...done.
Finished.
4228 input query terms found dup hits:
	[('5S_rRNA', 10), ('5_8S_rRNA', 6), ('7SK', 10), ('A1BG', 2), ('A2M', 5), ('A2ML1', 5), ('AADACL2', 
5242 input query terms found n

{'dup': [('5S_rRNA', 10),
  ('5_8S_rRNA', 6),
  ('7SK', 10),
  ('A1BG', 2),
  ('A2M', 5),
  ('A2ML1', 5),
  ('AADACL2', 4),
  ('AARSD1', 2),
  ('AATK', 2),
  ('ABCA9', 2),
  ('ABCC5', 2),
  ('ABCC6P1', 2),
  ('ABHD11', 3),
  ('ABHD14A', 2),
  ('ABHD15', 3),
  ('ABHD17A', 3),
  ('ABO', 2),
  ('ABP1', 2),
  ('ABRA', 2),
  ('ACAD11', 3),
  ('ACAP2', 3),
  ('ACAT1', 2),
  ('ACAT2', 2),
  ('ACBD3', 2),
  ('ACBD7', 2),
  ('ACOXL', 2),
  ('ACP1', 2),
  ('ACTA2', 3),
  ('ACTB', 2),
  ('ACTG1P4', 2),
  ('ACTN1', 2),
  ('ACTR3', 7),
  ('ACTR3B', 10),
  ('ACTR3BP2', 2),
  ('ACTR3BP5', 2),
  ('ACTR6', 3),
  ('ACVR2B', 2),
  ('ACY1', 2),
  ('AD', 10),
  ('ADAM18', 2),
  ('ADAM1A', 2),
  ('ADAM5', 2),
  ('ADAMTS19', 2),
  ('ADAMTS7', 8),
  ('ADAMTS9', 4),
  ('ADAMTS9-AS2', 2),
  ('ADAMTSL4', 2),
  ('ADARB2', 2),
  ('ADC', 2),
  ('ADCY3', 2),
  ('ADD3', 3),
  ('ADH4', 2),
  ('ADIPOQ', 9),
  ('ADIRF', 3),
  ('ADNP', 3),
  ('ADORA2A', 4),
  ('ADPGK', 2),
  ('ADRA1A', 2),
  ('AFAP1', 2),
  ('AFF1', 2),


In [17]:
dup = df['dup']
missing = df['missing']
df = df['out']
df = df[["HGNC", "symbol"]].dropna()
df = df.reset_index().drop_duplicates(subset='query', keep='first').set_index('query').rename_axis(None)
df

Unnamed: 0,HGNC,symbol
6M1-18,8176,OR11A1
7M1-2,8246,OR2F1
A1BG,5,A1BG
A1BG-AS1,37133,A1BG-AS1
A1CF,24086,A1CF
A2M,7,A2M
A2M-AS1,27057,A2M-AS1
A2ML1,23336,A2ML1
A2MP1,8,A2MP1
A3GALT2,30005,A3GALT2


In [20]:
len(df[df.index != df.symbol])

1025

In [19]:
missing

['AB000466',
 'AB007962',
 'AB059369',
 'AB062081',
 'AB062083',
 'AB073649',
 'AB074160',
 'AB074162',
 'AB074166',
 'AB074188',
 'AB075489',
 'AB075492',
 'AB209061',
 'AB209185',
 'AB209315',
 'AB209621',
 'AB231702',
 'AB231703',
 'AB231705',
 'AB231710',
 'AB231711',
 'AB231721',
 'AB231722',
 'AB231723',
 'AB231724',
 'AB231729',
 'AB231731',
 'AB231739',
 'AB231741',
 'AB231742',
 'AB231761',
 'AB231779',
 'AB231784',
 'AB240015',
 'AB372727',
 'AB429224',
 'AB488780',
 'AB586698',
 '1',
 'ADV21S1A1N',
 'AF007147',
 'AF020763',
 'AF035281',
 'AF047486',
 'AF055024',
 'AF063596',
 'AF070569',
 'AF070581',
 'AF072097',
 'AF075036',
 'AF075112',
 'AF079515',
 'AF085962',
 'AF085995',
 'AF086102',
 'AF086125',
 'AF086126',
 'AF086132',
 'AF086154',
 'AF086165',
 'AF086184',
 'AF086203',
 'AF086219',
 'AF086258',
 'AF086285',
 'AF086288',
 'AF086294',
 'AF086303',
 'AF086346',
 'AF086351',
 'AF086476',
 'AF088041',
 'AF090102',
 'AF090939',
 'AF116693',
 'AF119915',
 'AF131837',
 'AF

In [21]:
reference = reference.merge(df, how='left', left_on='geneName', right_index=True)
reference.head()

Unnamed: 0,geneName,ucID,chrom,geneStrand,geneStart,geneEnd,HGNC,symbol
0,DDX11L1,uc001aaa.3,chr1,+,11873,14409,37102,DDX11L1
1,DDX11L1,uc010nxr.1,chr1,+,11873,14409,37102,DDX11L1
2,DDX11L1,uc010nxq.1,chr1,+,11873,14409,37102,DDX11L1
3,WASH7P,uc009vis.3,chr1,-,14361,16765,38034,WASH7P
4,WASH7P,uc009vjc.1,chr1,-,16857,17751,38034,WASH7P


In [22]:
reference.symbol = reference.symbol.fillna(reference.geneName)

In [24]:
del reference['geneName']
reference.rename(columns={'symbol': 'geneName'}, inplace=True)
reference.head()

Unnamed: 0,ucID,chrom,geneStrand,geneStart,geneEnd,HGNC,geneName
0,uc001aaa.3,chr1,+,11873,14409,37102,DDX11L1
1,uc010nxr.1,chr1,+,11873,14409,37102,DDX11L1
2,uc010nxq.1,chr1,+,11873,14409,37102,DDX11L1
3,uc009vis.3,chr1,-,14361,16765,38034,WASH7P
4,uc009vjc.1,chr1,-,16857,17751,38034,WASH7P


In [25]:
reference = reference[['geneName', 'chrom', 'geneStrand', 'geneStart', 'geneEnd']]
reference.head()

Unnamed: 0,geneName,chrom,geneStrand,geneStart,geneEnd
0,DDX11L1,chr1,+,11873,14409
1,DDX11L1,chr1,+,11873,14409
2,DDX11L1,chr1,+,11873,14409
3,WASH7P,chr1,-,14361,16765
4,WASH7P,chr1,-,16857,17751


In [26]:
len(reference)

82960

In [27]:
len(reference.drop_duplicates())

60840

In [28]:
reference.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return func(*args, **kwargs)


In [29]:
reference.to_csv('../example_data/reference.normalized.tsv', sep='\t', index=False, header=True)

# Part II: Constructing a motifs file

Important! I've already normalized the MotifMap file, so I won't do it here. If you're running this again, make sure to normalize first, or use the normalized file I created!


In [2]:
def parse_motifs_file(motifs_file):
    """
    Parse the MotifMap BED file listing Transcription Factor Binding Motifs in the genome

    Arguments:
        motifs_file (string or FILE): file procured from MotifMap with full list of TF binding sites in the genome

    Returns:
        dataframe: motif dataframe
    """

    motif_fieldnames = ["ZScore","FDR_lower","name","orientation","chrom","LOD","strand","start","realhits","cid","FDR","NLOD","BBLS","stop","medianhits","accession","FDR_upper","BLS","stdevhits"]

    motif_dataframe = pd.read_csv(motifs_file, delimiter='\t', names=motif_fieldnames)

    motif_dataframe.rename(index=str, columns={"start":"motifStart", "stop":"motifEnd", "FDR":"motifScore", "strand":"motifStrand", "name":"motifName"}, inplace=True)

    return motif_dataframe


In [4]:
motifs = parse_motifs_file(motifs_file)
motifs.head()

Unnamed: 0,ZScore,FDR_lower,motifName,orientation,chrom,LOD,motifStrand,motifStart,realhits,cid,motifScore,NLOD,BBLS,motifEnd,medianhits,accession,FDR_upper,BLS,stdevhits
0,6.43448,0.0,RFX1,1,chr17,29.355633,+,58239126,1,1,0.0,0.992106,1.76339,58239144,0.0,LM1_RFX1,0.0,3.349127,0.0
1,6.430004,0.0,RFX1,-1,chr17,29.320824,-,58239126,2,2,0.0,0.991646,1.763377,58239144,0.0,LM1_RFX1,0.0,3.349127,0.0
2,6.368495,0.0,RFX1,-1,chr5,28.842535,-,63404689,3,3,0.0,0.985325,0.635808,63404707,0.0,LM1_RFX1,0.0,2.725784,0.0
3,6.364415,0.0,RFX1,1,chr5,28.810813,+,63404689,4,4,0.0,0.984906,0.635796,63404707,0.0,LM1_RFX1,0.0,2.725784,0.0
4,6.353193,0.0,RFX1,1,chr7,28.723547,+,128800229,2,5,0.0,0.983752,1.815382,128800247,0.0,LM1_RFX1,0.0,4.027027,0.0


In [5]:
motifs = motifs[['motifName', 'chrom', 'motifStrand', 'motifStart', 'motifEnd', 'motifScore']]
motifs.head()

Unnamed: 0,motifName,chrom,motifStrand,motifStart,motifEnd,motifScore
0,RFX1,chr17,+,58239126,58239144,0.0
1,RFX1,chr17,-,58239126,58239144,0.0
2,RFX1,chr5,-,63404689,63404707,0.0
3,RFX1,chr5,+,63404689,63404707,0.0
4,RFX1,chr7,+,128800229,128800247,0.0


In [6]:
len(motifs)

17309990

In [7]:
len(motifs.drop_duplicates())

17301513

In [8]:
motifs = motifs.drop_duplicates()

In [32]:
motifs.to_csv('../example_data/motifmap.normalized.cleaned.tsv', sep='\t', index=False, header=True)

# Part III: Merging genes and motifs

In [36]:
reference = pd.read_csv('../example_data/reference.normalized.tsv', sep='\t')
motifs = pd.read_csv('../example_data/motifmap.normalized.cleaned.tsv', sep='\t')

In [37]:
reference.head()

Unnamed: 0,geneName,chrom,geneStrand,geneStart,geneEnd
0,DDX11L1,chr1,+,11873,14409
1,WASH7P,chr1,-,14361,16765
2,WASH7P,chr1,-,16857,17751
3,WASH7P,chr1,-,15795,18061
4,WASH7P,chr1,-,14361,19759


In [38]:
motifs.head()

Unnamed: 0,motifName,chrom,motifStrand,motifStart,motifEnd,motifScore
0,RFX1,chr17,+,58239126,58239144,0.0
1,RFX1,chr17,-,58239126,58239144,0.0
2,RFX1,chr5,-,63404689,63404707,0.0
3,RFX1,chr5,+,63404689,63404707,0.0
4,RFX1,chr7,+,128800229,128800247,0.0


In [17]:
import sys
sys.path.append("/Users/alex/Documents/GarNet/src/")
from garnet import *

In [39]:
reference = group_by_chromosome(reference)
motifs = group_by_chromosome(motifs)

In [41]:
options = {'upstream_window': 10000, 'downstream_window': 10000, 'tss': False}
reference = {chrom: IntervalTree_from_reference(genes, options) for chrom, genes in reference.items()}
motifs = {chrom: IntervalTree_from_motifs(chromosome_motifs) for chrom, chromosome_motifs in motifs.items()}

In [42]:
motifs_with_associated_genes = intersection_of_dict_of_intervaltree(motifs, reference)

10:33:07 - GarNet: INFO - Computing intersection operation of IntervalTrees for each chromosome...


In [43]:
motifs_and_genes = [{**motif, **gene} for motif, gene in motifs_with_associated_genes]
motifs_and_genes = pd.DataFrame.from_records(motifs_and_genes)
motifs_and_genes.head()

Unnamed: 0,chrom,geneEnd,geneName,geneStart,geneStrand,motifEnd,motifName,motifScore,motifStart,motifStrand
0,chrY,5610264,PCDH11Y,4924130,+,5409966,HMG IY,0.216509,5409959,+
1,chrY,15592550,UTY,15360258,-,15522705,IRF8,0.379428,15522698,-
2,chrY,15592550,UTY,15434981,-,15522705,IRF8,0.379428,15522698,-
3,chrY,15592550,UTY,15435434,-,15522705,IRF8,0.379428,15522698,-
4,chrY,15592550,UTY,15409388,-,15522705,IRF8,0.379428,15522698,-


In [44]:
motifs_and_genes = motifs_and_genes[['chrom','motifName','motifStrand','motifStart','motifEnd','motifScore','geneName','geneStart','geneEnd','geneStrand']]
motifs_and_genes

Unnamed: 0,chrom,motifName,motifStrand,motifStart,motifEnd,motifScore,geneName,geneEnd,geneStart,geneStrand
0,chrY,HMG IY,+,5409959,5409966,0.216509,PCDH11Y,5610264,4924130,+
1,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15360258,-
2,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15434981,-
3,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15435434,-
4,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15409388,-
5,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15522993,15434913,-
6,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15447442,-
7,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15434913,-
8,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15434947,-
9,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15582109,15434913,-


In [45]:
motifs_and_genes['motif_to_gene_distance'] = motifs_and_genes['motifStart'] - motifs_and_genes['geneStart']
motifs_and_genes.head()

Unnamed: 0,chrom,motifName,motifStrand,motifStart,motifEnd,motifScore,geneName,geneEnd,geneStart,geneStrand,motif_to_gene_distance
0,chrY,HMG IY,+,5409959,5409966,0.216509,PCDH11Y,5610264,4924130,+,485829
1,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15360258,-,162440
2,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15434981,-,87717
3,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15435434,-,87264
4,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15592550,15409388,-,113310


In [11]:
motifs_and_genes.to_csv('../example_data/intersection.tsv', sep='\t', header=True, index=False)

# Part IV: Constructing a database

In [3]:
motifs_and_genes = pd.read_csv('../example_data/intersection.tsv', sep='\t')
motifs_and_genes.head()

Unnamed: 0,chrom,motifName,motifStrand,motifStart,motifEnd,motifScore,geneName,geneStart,geneEnd,geneStrand,motif_to_gene_distance
0,chrY,HMG IY,+,5409959,5409966,0.216509,PCDH11Y,4924130,5610264,+,485829
1,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15360258,15592550,-,162440
2,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15434981,15592550,-,87717
3,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15435434,15592550,-,87264
4,chrY,IRF8,-,15522698,15522705,0.379428,UTY,15409388,15592550,-,113310


In [8]:
GarNetDB = sqlite3.connect('../example_data/garnetDB.sql')
motifs_and_genes.to_sql('garnetdb', GarNetDB, if_exists="replace")

In [7]:
GarNetDB.execute("CREATE INDEX chr_start_stop on garnetdb(chrom, motifStart, motifEnd);")

GarNetDB.commit()
GarNetDB.close()

# Part V: Test if it works

In [9]:
GarNetDB = sqlite3.connect('../example_data/garnetDB.sql')

In [20]:
peaks = parse_peaks_file('/Users/alex/Documents/GarNet/example_data/A549_FOXA1_broadPeak.bed')
peaks.head()

Unnamed: 0,chrom,peakStart,peakEnd,peakName,peakScore,peakStrand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts
0,chr1,4709051,4709736,peak1,114,.,188.76,-1,-1,,,
1,chr1,5507616,5507816,peak2,65,.,107.7,-1,-1,,,
2,chr1,7363480,7363851,peak3,224,.,370.93,-1,-1,,,
3,chr1,7777806,7778092,peak4,79,.,130.74,-1,-1,,,
4,chr1,8064603,8065661,peak5,164,.,272.4,-1,-1,,,


In [21]:
peaks = peaks[['peakName', 'chrom', 'peakStart', 'peakEnd', 'peakScore']]
peaks.head()

Unnamed: 0,peakName,chrom,peakStart,peakEnd,peakScore
0,peak1,chr1,4709051,4709736,114
1,peak2,chr1,5507616,5507816,65
2,peak3,chr1,7363480,7363851,224
3,peak4,chr1,7777806,7778092,79
4,peak5,chr1,8064603,8065661,164


In [22]:
peaks.to_sql('peaks', GarNetDB, if_exists="replace")

In [24]:
overlaps = pd.read_sql_query("""
    SELECT garnetdb.*
    FROM garnetdb
    JOIN peaks ON garnetdb.chrom      == peaks.chrom
              AND garnetdb.motifStart <= peaks.peakEnd
              AND garnetdb.motifEnd   >= peaks.peakStart;
    """, GarNetDB)

In [25]:
overlaps

Unnamed: 0,index,chrom,motifName,motifStrand,motifStart,motifEnd,motifScore,geneName,geneStart,geneEnd,geneStrand,motif_to_gene_distance
0,7815,chr19,ETS2,-,8420716,8420723,0.077690,ANGPTL4,8429010,8439257,+,-8294
1,7816,chr19,ETS2,-,8420716,8420723,0.077690,ANGPTL4,8429039,8439257,+,-8323
2,8274,chr19,MAFB,+,8420676,8420682,0.529717,ANGPTL4,8429010,8439257,+,-8334
3,8275,chr19,MAFB,+,8420676,8420682,0.529717,ANGPTL4,8429039,8439257,+,-8363
4,13821,chr19,USF2,+,45657939,45657945,0.644043,TRAPPC6A,45666185,45681501,-,-8246
5,13822,chr19,USF2,+,45657939,45657945,0.644043,PPP1R37,45596430,45650543,+,61509
6,13823,chr19,USF2,+,45657939,45657945,0.644043,NKPD1,45653007,45663408,-,4932
7,13824,chr19,USF2,+,45657939,45657945,0.644043,NKPD1,45655160,45657110,-,2779
8,13825,chr19,USF2,-,45657939,45657945,0.644043,TRAPPC6A,45666185,45681501,-,-8246
9,13826,chr19,USF2,-,45657939,45657945,0.644043,PPP1R37,45596430,45650543,+,61509
