Skip to content
This repository has been archived by the owner on Jun 16, 2018. It is now read-only.

Commit

Permalink
Merge pull request #11 from mruffalo/master
Browse files Browse the repository at this point in the history
Minor tweaks/updates
  • Loading branch information
brentp committed Aug 13, 2013
2 parents 894cd05 + 505dfe5 commit 70b1fdf
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 46 deletions.
26 changes: 13 additions & 13 deletions ez_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
This file can also be run as a script to install or upgrade setuptools.
"""
from __future__ import print_function
import sys
DEFAULT_VERSION = "0.6c11"
DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]
Expand Down Expand Up @@ -63,17 +64,16 @@
}

import sys, os
try: from hashlib import md5
except ImportError: from md5 import md5
from hashlib import md5

def _validate_md5(egg_name, data):
if egg_name in md5_data:
digest = md5(data).hexdigest()
if digest != md5_data[egg_name]:
print >>sys.stderr, (
print((
"md5 validation of %s failed! (Possible download problem?)"
% egg_name
)
), file=sys.stderr)
sys.exit(2)
return data

Expand Down Expand Up @@ -103,14 +103,14 @@ def do_download():
return do_download()
try:
pkg_resources.require("setuptools>="+version); return
except pkg_resources.VersionConflict, e:
except pkg_resources.VersionConflict as e:
if was_imported:
print >>sys.stderr, (
print((
"The required version of setuptools (>=%s) is not available, and\n"
"can't be installed while this script is running. Please install\n"
" a more recent version first, using 'easy_install -U setuptools'."
"\n\n(Currently using %r)"
) % (version, e.args[0])
) % (version, e.args[0]), file=sys.stderr)
sys.exit(2)
else:
del pkg_resources, sys.modules['pkg_resources'] # reload ok
Expand Down Expand Up @@ -216,10 +216,10 @@ def main(argv, version=DEFAULT_VERSION):
os.unlink(egg)
else:
if setuptools.__version__ == '0.0.1':
print >>sys.stderr, (
print((
"You have an obsolete version of setuptools installed. Please\n"
"remove it from your system entirely before rerunning this script."
)
), file=sys.stderr)
sys.exit(2)

req = "setuptools>="+version
Expand All @@ -238,8 +238,8 @@ def main(argv, version=DEFAULT_VERSION):
from setuptools.command.easy_install import main
main(argv)
else:
print "Setuptools version",version,"or greater has been installed."
print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
print("Setuptools version",version,"or greater has been installed.")
print('(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)')

def update_md5(filenames):
"""Update our built-in md5 registry"""
Expand All @@ -252,7 +252,7 @@ def update_md5(filenames):
md5_data[base] = md5(f.read()).hexdigest()
f.close()

data = [" %r: %r,\n" % it for it in md5_data.items()]
data = [" %r: %r,\n" % it for it in md5_data.iteritems()]
data.sort()
repl = "".join(data)

Expand All @@ -262,7 +262,7 @@ def update_md5(filenames):

match = re.search("\nmd5_data = {\n([^}]+)}", src)
if not match:
print >>sys.stderr, "Internal error!"
print("Internal error!", file=sys.stderr)
sys.exit(2)

src = src[:match.start(1)] + repl + src[match.end(1):]
Expand Down
21 changes: 11 additions & 10 deletions pyfasta/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
import sys
from fasta import Fasta, complement, DuplicateHeaderException
from records import *
Expand All @@ -23,15 +24,15 @@ def main():
pyfasta extract --help
"""
if len(sys.argv) == 1:
print help
print(help)
sys.exit()

action = sys.argv[1]

sglobals = globals()
if not action in sglobals:
print "%s not a valid action" % action
print help
print("%s not a valid action" % action)
print(help)
sys.exit()

globals()[action](sys.argv[2:])
Expand Down Expand Up @@ -74,8 +75,8 @@ def info(args):
else:
info.sort()

print "\n" + fasta
print "=" * len(fasta)
print("\n" + fasta)
print("=" * len(fasta))
for k, l in info:
gc = ""
if options.gc:
Expand All @@ -84,12 +85,12 @@ def info(args):
c = seq.count('C')
gc = 100.0 * (g + c) / float(l)
gc = "gc:%.2f%%" % gc
print (">%s length:%i " % (k, l)) + gc
print((">%s length:%i " % (k, l)) + gc)

if total_len > 1000000:
total_len = "%.3fM" % (total_len / 1000000.)
print
print "%s basepairs in %i sequences" % (total_len, nseqs)
print()
print("%s basepairs in %i sequences" % (total_len, nseqs))

def flatten(args):
"""
Expand Down Expand Up @@ -132,8 +133,8 @@ def extract(args):
for seqname in seqs:
seq = f[seqname]
if options.header:
print ">%s" % seqname
print seq
print(">%s" % seqname)
print(seq)


if __name__ == "__main__":
Expand Down
12 changes: 6 additions & 6 deletions pyfasta/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, fasta_name, record_class=NpyFastaRecord,
"""
>>> from pyfasta import Fasta, FastaRecord
>>> f = Fasta('tests/data/three_chrs.fasta',
>>> f = Fasta('tests/data/three_chrs.fasta',
... record_class=FastaRecord)
>>> sorted(f.keys())
['chr1', 'chr2', 'chr3']
Expand Down Expand Up @@ -73,7 +73,7 @@ def gen_seqs_with_headers(self, key_fn=None):
fh = open(self.fasta_name, 'r')
# do the flattening (remove newlines)
# check of unique-ness of headers.
seen_headers = {}
seen_headers = set()
header = None
seqs = None
for line in fh:
Expand All @@ -83,7 +83,7 @@ def gen_seqs_with_headers(self, key_fn=None):
if seqs is not None:
if header in seen_headers:
raise DuplicateHeaderException(header)
seen_headers[header] = None
seen_headers.add(header)
yield header, "".join(seqs)

header = line[1:].strip()
Expand All @@ -93,7 +93,7 @@ def gen_seqs_with_headers(self, key_fn=None):
else:
seqs.append(line)

if seqs != []:
if seqs:
if header in seen_headers:
raise DuplicateHeaderException(header)
yield header, "".join(seqs)
Expand Down Expand Up @@ -156,7 +156,7 @@ def sequence(self, f, asstring=True, auto_rc=True
>>> f['chr3'][:][-10:]
'CGCACGCTAC'
a feature can have exons:
>>> feat = dict(start=9, stop=19, strand=1, chr='chr1'
... , exons=[(9,11), (13, 15), (17, 19)])
Expand Down Expand Up @@ -189,7 +189,7 @@ def sequence(self, f, asstring=True, auto_rc=True
sequence = self._seq_from_keys(f, fasta, exon_keys, one_based=one_based)

if sequence is None:
start = f['start'] - int(one_based)
start = f['start'] - int(one_based)
sequence = fasta[start: f['stop']]

if auto_rc and f.get('strand') in (-1, '-1', '-'):
Expand Down
5 changes: 2 additions & 3 deletions pyfasta/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,8 @@ def copy_inplace(klass, flat_name, fasta_name):
os.rename(flat_name, fasta_name)
# still need the flattend file to show
# it's current.
flatfh = open(fasta_name + klass.ext, 'w')
flatfh.write(MAGIC)
flatfh.close()
with open(fasta_name + klass.ext, 'w') as flatfh:
flatfh.write(MAGIC)



Expand Down
17 changes: 9 additions & 8 deletions pyfasta/split_fasta.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
from pyfasta import Fasta
import operator
import collections
Expand Down Expand Up @@ -47,8 +48,8 @@ def newnames(oldname, n, kmers=None, overlap=None, header=None):
else:
width = len(str(n))
names = [pattern % str(i).rjust(width, '0') for i in range(n)]
print >>sys.stderr, "creating new files:"
print >>sys.stderr, "\n".join(names)
print("creating new files:", file=sys.stderr)
print("\n".join(names), file=sys.stderr)
return names


Expand All @@ -57,8 +58,8 @@ def print_to_fh(fh, fasta, lens, seqinfo):
lens[fh.name] += seqlen
f = fasta
assert len(str(f[key])) == seqlen, (key, seqlen, len(str(f[key])))
print >>fh, ">%s" % key
print >>fh, str(f[key])
print(">%s" % key, file=fh)
print(str(f[key]), file=fh)


def format_kmer(seqid, start):
Expand Down Expand Up @@ -132,8 +133,8 @@ def with_header_names(f, names):
"""
for seqid, name in names.iteritems():
with open(name, 'w') as fh:
print >>fh, ">%s" % seqid
print >>fh, str(f[seqid])
print(">%s" % seqid, file=fh)
print(str(f[seqid]), file=fh)

def with_kmers(f, names, k, overlap):
"""
Expand All @@ -148,8 +149,8 @@ def with_kmers(f, names, k, overlap):
for (start0, subseq) in Fasta.as_kmers(seq, k, overlap=overlap):

fh = fhs[i % len(fhs)]
print >>fh, ">%s" % format_kmer(seqid, start0)
print >>fh, subseq
print(">%s" % format_kmer(seqid, start0), file=fh)
print(subseq, file=fh)
i += 1
for fh in fhs:
fh.close()
Expand Down
12 changes: 7 additions & 5 deletions tests/bench.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import print_function
from itertools import islice
import sys
import os
sys.path.insert(0, os.path.abspath("."))
Expand All @@ -14,15 +16,15 @@ def make_long_fasta(filename="t.fasta", nrecs=2500, seqlen=SEQLEN):
#0123456789"
s = "ACTGACTGAC"
for i in range(nrecs):
print >>fh, ">header%i" % i
print >>fh, (s * (seqlen/10))
print(">header%i" % i, file=fh)
print(s * (seqlen/10), file=fh)

fh.close()
return filename

def read(f, nreads=40000, seqlen=SEQLEN):

for k in f.keys()[:10]:
for k in islice(f.iterkeys(), 10):
for i in range(nreads):
start = random.randint(0, seqlen)
end = min(seqlen, start + random.randint(1000, 2000))
Expand All @@ -35,12 +37,12 @@ def main():

t = time.time()
f = Fasta(fa)
print "flatten:", time.time() - t
print("flatten:", time.time() - t)


t = time.time()
read(f)
print "read:", time.time() - t
print("read:", time.time() - t)



Expand Down
2 changes: 1 addition & 1 deletion tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def check_reload(klass, fasta_name):


def check_full_slice(f):
for k in f.keys():
for k in f.iterkeys():
assert str(f[k]) == f[k][:]
assert str(f[k]) == f[k][0:]

Expand Down

0 comments on commit 70b1fdf

Please sign in to comment.