Skip to content

Commit

Permalink
fix GFF start position
Browse files Browse the repository at this point in the history
  • Loading branch information
daler committed May 5, 2011
1 parent 0348616 commit c0163ce
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
6 changes: 5 additions & 1 deletion pybedtools/cbedtools.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,16 @@ cdef class Interval:
self._bed.fields[idx] = string(chrom)

property start:
""" the start of the feature"""
""" the 0-based start of the feature"""
def __get__(self):
return self._bed.start
def __set__(self, int start):
self._bed.start = start
idx = LOOKUPS[self.file_type]["start"]

# Non-BED files should have 1-based coords in fields
if self.file_type != 'bed': start += 1

s = str(start)
self._bed.fields[idx] = string(s)

Expand Down
29 changes: 29 additions & 0 deletions pybedtools/test/test_cbedtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,35 @@ def setUp(self):
self.i = Interval("chr1", start, end, strand)
self.start, self.end, self.strand = start, end, strand

# Overwrite IntervalTest.testStart
def testStart(self):
ivf = IntervalFile(self.file)
iv = ivf.next()
orig_string = str(iv)

# 0-based.
orig_start = iv.start

# Setting .start always sets 0-based coord.
iv.start = orig_start

# But for GFF setting .start should also make the .fields[3] the GFF
# 1-based coord
assert iv.start == int(iv.fields[3])-1

second_string = str(iv)
second_start = iv.start
iv.start = second_start

# Check .start and .fields[3] internal consistency again
assert iv.start == int(iv.fields[3])-1

print ' orig:', '(start=%s)'%orig_start, orig_string
print ' second:', '(start=%s)'%second_start, second_string
print 'current:', '(start=%s)'%iv.start, str(iv)
self.assert_(orig_start == second_start == iv.start)
self.assert_(orig_string == second_string == str(iv))



if __name__ == "__main__":
Expand Down

3 comments on commit c0163ce

@brentp
Copy link
Contributor

@brentp brentp commented on c0163ce May 5, 2011

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't had time to check this out. But how do our examples from #8 (comment)
e.g.

import pybedtools
b = pybedtools.BedTool('pybedtools/test/data/c.gff')
d = iter(b).next()
print d
d.start = d.start
print d

behave now?

@daler
Copy link
Owner Author

@daler daler commented on c0163ce May 5, 2011

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, it works, and testStart checks this so we don't get a regression. But to confirm:

>>> import pybedtools
>>> a = pybedtools.example_bedtool('c.gff')
>>> d = iter(a).next()
>>> d.file_type
'gff'
>>> d.start  # .start is always 0-based
464L
>>> d.start = d.start
>>> d.start
464L
>>> d.fields
['chr1',
 'ucb',
 'gene',
 '465', # this is a string and is in 1-based coords
 '805',
 '.',
 '+',
 '.',
 'ID=thaliana_1_465_805;match=scaffold_801404.1;rname=thaliana_1_465_805']

@brentp
Copy link
Contributor

@brentp brentp commented on c0163ce May 6, 2011

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good to me, i tried to break it but couldn't.

Please sign in to comment.