diff --git a/deenurp/subcommands/deduplicate_sequences.py b/deenurp/subcommands/deduplicate_sequences.py index c68fd90..5a20391 100644 --- a/deenurp/subcommands/deduplicate_sequences.py +++ b/deenurp/subcommands/deduplicate_sequences.py @@ -51,7 +51,7 @@ def action(args): seqhashes = dict() for record in util.Counter(SeqIO.parse(sequences_in, 'fasta')): seq = str(record.seq).replace('\n', '').upper() - seqhashes[record.name] = hashlib.sha1(seq).hexdigest() + seqhashes[record.name] = hashlib.sha1(seq.encode('utf-8')).hexdigest() seqhash = pandas.Series(data=seqhashes, name='seqhash') seqhash.index.name = 'seqname' diff --git a/deenurp/util.py b/deenurp/util.py index a90355e..8221eae 100644 --- a/deenurp/util.py +++ b/deenurp/util.py @@ -48,20 +48,20 @@ def __init__(self, iterable, stream=sys.stderr, report_every=0.3, self.stream = stream self.report_every = report_every self.prefix = prefix - self.start = time.clock() + self.start = time.time() self.last = 0 def _report(self): if self.stream: msg = '{0}{1:15d} [{2:10.2f}s]\r' - msg = msg.format(self.prefix, self.count, time.clock()-self.start) + msg = msg.format(self.prefix, self.count, time.time()-self.start) self.stream.write(msg) def __iter__(self): for i in self._it: yield i self.count += 1 - now = time.clock() + now = time.time() if now - self.last > self.report_every: self._report() self.last = now diff --git a/requirements.txt b/requirements.txt index 512b913..9c6434d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ decorator==5.1.1 DendroPy==4.5.2 fastalite==0.3 greenlet==1.1.2 -hdbscan==0.8.28 +hdbscan==0.8.33 Jinja2==3.0.3 joblib==1.1.0 MarkupSafe==2.1.0