Skip to content

Commit

Permalink
Added sort option
Browse files Browse the repository at this point in the history
  • Loading branch information
asntech committed Feb 5, 2017
1 parent 29b7938 commit e8c11c8
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 29 deletions.
2 changes: 1 addition & 1 deletion intervene/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.33'
__version__ = '0.34'
43 changes: 29 additions & 14 deletions intervene/intervene
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ def main():

#venn module argument parser
venn_parser = subparsers.add_parser('venn', usage='intervene venn [options]',
description='Create Venn diagram upto 6-way after intersection of genomic regions in <BED/GTF/GFF/VCF> or list sets.',
description='Create Venn diagram upto 6-way after intersection of genomic regions in (BED/GTF/GFF) format or list sets.',
help='Venn diagram of intersection of genomic regions or list sets (upto 6-way).')

venn_parser.add_argument('-i', dest='input', nargs="*",
help='Input genomic regions in <BED/GTF/GFF/VCF> format or list files. '
help='Input genomic regions in (BED/GTF/GFF) format or list files. '
'For files in a directory use *.<extension>. e.g. *.bed')

venn_parser.add_argument('--type', dest='type', default='genomic', choices=('genomic','list'),
Expand Down Expand Up @@ -224,12 +224,16 @@ def main():

#upset module argument parser
upset_parser = subparsers.add_parser('upset', usage='intervene upset [options]',
description='Create UpSet diagram after intersection of genomic regions in <BED/GTF/GFF/VCF> or list sets.',
description='Create UpSet diagram after intersection of genomic regions in (BED/GTF/GFF) or list sets.',
help='UpSet diagram of intersection of genomic regions or list sets.')

upset_parser.add_argument('-i', dest='input', nargs="*",
help='Input genomic regions in <BED/GTF/GFF/VCF> format or list files. '
help='Input genomic regions in (BED/GTF/GFF) format or list files. '
'For files in a directory use *.<extension>. e.g. *.bed')
'''
upset_parser.add_argument('-c', dest='compare', nargs=1,
help='Input genomic region file (BED/GTF/GFF) format to compare with input files.')
'''

upset_parser.add_argument('--type', dest='type', choices=('genomic','list'),default='genomic',
help='Type of input sets. Genomic regions or lists of genes sets. '
Expand Down Expand Up @@ -295,11 +299,11 @@ def main():

#pairwise module argument parser
pairwise_parser = subparsers.add_parser('pairwise', usage='intervene pairwise [options]',
description='Pairwise intersection and heatmap of N genomic region sets in <BED/GTF/GFF/VCF> format.',
help='Pairwise intersection and heatmap of N genomic region sets in <BED/GTF/GFF/VCF> format.')
description='Pairwise intersection and heatmap of N genomic region sets in (BED/GTF/GFF/VCF) format.',
help='Pairwise intersection and heatmap of N genomic region sets in <BED/GTF/GFF> format.')

pairwise_parser.add_argument('-i', dest='input', nargs="*",
help='Input genomic regions in <BED/GTF/GFF/VCF> format. '
help='Input genomic regions in (BED/GTF/GFF) format. '
'For files in a directory use *.<extension>. e.g. *.bed')

pairwise_parser.add_argument('--type', choices=('count','frac','jaccard','fisher','reldist'),
Expand All @@ -311,18 +315,22 @@ def main():
'Default is "%(default)s"')

pairwise_parser.add_argument('--htype', dest="htype",choices=("tribar","color", "pie","circle", "square", "ellipse", "number", "shade"),
default='tribar',help='Heatmap plot type. '
default='pie',help='Heatmap plot type. '
'Default is "%(default)s"')

pairwise_parser.add_argument('--names', dest='labels', default='A,B,C,D,E,F',
pairwise_parser.add_argument('--names', dest='labels',
help='Comma-separated list of names for input files. '
'Default is: --names=A,B,C,D,E,F')
'Default is base name of input files')

pairwise_parser.add_argument('--filenames', action='store_true', default=False,
help='Use file names as labels instead. '
'Default is "%(default)s"')

#'''

pairwise_parser.add_argument('--sort', action='store_true', default=False,
help='Set this only if your files are not sorted. '
'Default is "%(default)s"')

'''
pairwise_parser.add_argument('--enrichment', action='store_true',
help='Run randomizations (default 1000, specify otherwise '
'with --iterations) on each pairwise comparison and '
Expand All @@ -334,7 +342,7 @@ def main():
pairwise_parser.add_argument('--processes', default=1, type=int,
help='Number of processors to perform for enrichement '
'scores')
#'''
'''
pairwise_parser.add_argument('--genome', help='Required argument if --type=fisher. '
'Needs to be a string assembly name like "mm10" or "hg38"')
pairwise_parser.add_argument('-o', '--output', dest='output',
Expand Down Expand Up @@ -398,6 +406,13 @@ def main():
pairwise.pairwise_intersection(options)
sys.exit(1)

'''
if options.command =='upset':
if options.compare:
upset.one_vs_rest_intersection(options.input,options.compare,options.output)
sys.exit(1)
'''

output_name = options.output+'/InterVene_'+options.command+'_'+str(venn_order(options.input))+'way_'+plot_type+'.'+options.figtype

#check file name already exists and ask if user want to overwrite
Expand Down
38 changes: 24 additions & 14 deletions intervene/modules/pairwise/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def jaccard_of_a(a, b):
return a.jaccard(b,u=True)['jaccard']

#Calculate the fisher
def fisher_of_a(a, b):
return a.fisher(b,genome='hg19').two_tail
def fisher_of_a(a, b, genome):
return a.fisher(b,genome=genome).two_tail

#Calculate the reldist
def reldist_of_a(a, b):
Expand All @@ -48,7 +48,7 @@ def enrichment_score(a, b, genome_fn, iterations=1000, processes=1):
results = a.randomstats(b, new=True, genome_fn=genome_fn, iterations=iterations, processes=processes)
return (results['actual'] + 1) / (results['median randomized'] + 1)

def create_matrix(beds, func, verbose=False, **kwoptions):
def create_matrix(beds, func, verbose=False, sort_bed=False, **kwoptions):
nfiles = len(beds)
total = nfiles ** 2
i = 0
Expand All @@ -58,10 +58,13 @@ def create_matrix(beds, func, verbose=False, **kwoptions):
matrix = collections.defaultdict(dict)
for fa in beds:
a = BedTool(fa)
if sort_bed:
a = a.sort()
for fb in beds:
i += 1
b = BedTool(fb)

if sort_bed:
b = b.sort()
if verbose:
sys.stderr.write(
'%(i)s of %(total)s: %(fa)s + %(fb)s\n' % locals())
Expand All @@ -74,6 +77,7 @@ def create_matrix(beds, func, verbose=False, **kwoptions):
return matrix, bed_names, bed_sizes



def barplot(series, matrix, outfile, options, max_size=1):
"""Create a bar plot and place the lower triangle of a heatmap directly
adjacent so that the bases of the bars line up with the diagonal of the
Expand Down Expand Up @@ -215,15 +219,18 @@ def heatmap_triangle(dataframe, axes, hlabel='Fraction of overlap'):

return caxes, D.index


def pairwise_intersection(options):

'''
if options.enrichment:
FUNC = enrichment_score
genome_fn = pybedtools.chromsizes_to_file(pybedtools.chromsizes(options.genome))
kwoptions = dict(genome_fn=genome_fn, iterations=options.iterations,
processes=options.processes)
'''

elif options.type == "frac":
if options.type == "frac":
FUNC = frac_of_a
kwoptions = {}

Expand All @@ -233,7 +240,9 @@ def pairwise_intersection(options):

elif options.type == 'fisher':
FUNC = fisher_of_a
kwoptions = {}
kwoptions = dict(genome=options.genome)

#kwoptions = {}

elif options.type == 'reldist':
FUNC = reldist_of_a
Expand All @@ -243,17 +252,13 @@ def pairwise_intersection(options):
FUNC = actual_intersection
kwoptions = {}

t0 = time.time()
#matrix = create_matrix(beds=options.input, func=FUNC, verbose=options.verbose, **kwoptions)
matrix, bed_names, bed_sizes = create_matrix(beds=options.input, func=FUNC, verbose=False, **kwoptions)
matrix, bed_names, bed_sizes = create_matrix(beds=options.input, func=FUNC, verbose=False, sort_bed=options.sort, **kwoptions)

#print(bed_sizes)

t1 = time.time()

nfiles = len(options.input)

output_name = options.output+'/InterVene_'+options.command+'_'+str(nfiles)+'_files_'
output_name = options.output+'/Intervene_'+options.command+'_'+str(nfiles)+'_files_'

#if options.verbose:
# sys.stderr.write('Time to construct %s x %s matrix: %.1fs' \
Expand Down Expand Up @@ -303,13 +308,18 @@ def pairwise_intersection(options):
#df = pd.read_csv('data.csv',index_col=0, delim_whitespace=True)
#matrix = pd.DataFrame(np.random.random((nrows, ncols)), columns=labels)
outfile = options.output+'/'+str(options.type)+'_barplot_heatmap.'+options.figtype
barplot(series, matrix, outfile, options, max_size=max(bed_sizes))
barplot(series, matrix, outfile, options, max_size=max(bed_sizes))

print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using Intervene!\n')


else:
#print("Please check the matrix file "+matrix_file)
cmd = 'heatmap_intervene.R %s %s %s %s %s' % (matrix_file,options.htype,options.type, output_name,options.figtype)
os.system(cmd)

print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using InterVene!\n')
print('\nYou are done! Please check your results @ '+options.output+'. \nThank you for using Intervene!\n')




45 changes: 45 additions & 0 deletions intervene/modules/upset/upset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import sys
import os
import tempfile
from intervene.modules.pairwise.pairwise import get_name
from pybedtools import BedTool


def create_r_script(labels, names, options):
"""
Expand Down Expand Up @@ -130,4 +133,46 @@ def draw_genomic(labels, names, output, fig_type):
cmd = 'upset_plot_intervene.R %s %s %s %s %s ' % ('genomic',len(key),temp_f.name, output, fig_type)
os.system(cmd)
sys.exit(1)


def one_vs_rest_intersection(beds, peaks, output, **kwoptions):
'''
Compares a set of peaks with several other peaks sets.
'''
names = []
matrix_file = output+'/One_vs_all_peak_set_matrix.txt'
f = open(matrix_file, 'w')

f.write('peak_id')
#f.write('peak_id\tchrom\tstart\tend')

for bed in beds:
names.append(get_name(bed))
f.write('\t' + str(get_name(bed)))
#main_int.append(names)

peaks = BedTool(peaks[0])
f.write('\n')
for i in peaks:
#region_int = []
peak_id = str(i.chrom)+"_"+str(i.start)+"_"+str(i.end)
f.write(peak_id)
#f.write(peak_id + '\t' + i.chrom + '\t' + str(i.start) + '\t' + str(i.end))

for bed in beds:
b = BedTool(bed)
int_count = BedTool(str(i), from_string=True).intersect(b).count()
if (int_count > 0):
#region_int.append("1")
f.write('\t' + str(1))
else:
#region_int.append("0")
f.write('\t' + str(0))
f.write('\n')
#main_int.append(region_int)
#matrix[peak_id] = region_int
f.close()

return matrix_file

0 comments on commit e8c11c8

Please sign in to comment.