Skip to content

Commit

Permalink
mass renaming
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Aug 5, 2015
1 parent 82351f4 commit a7893cf
Show file tree
Hide file tree
Showing 46 changed files with 406 additions and 465 deletions.
12 changes: 9 additions & 3 deletions ChangeLog
@@ -1,9 +1,15 @@
2015-07-29 Michael R. Crusoe <crusoe@ucdavis.edu>

* scripts/*.py,tests/*.py,sandbox/*.py,khmer/*.py,oxli/*.py:
counting_hash, hashtable->countgraph; hashbits->nodegraph;
check_space_for_hashtable->check_space_for_graph; hash_args->graph_args

many function and variable renames:
counting_hash, countinghash, hashtable->countgraph;
CountingHash->Countgraph
hashbits->nodegraph; Hashbits->Nodegraph;
check_space_for_hashtable->check_space_for_graph;
hash_args->graph_args
* khmer/_khmer.cc: remove unused 'new_hashtable' method; match renames
* TODO: removed several items
* doc/dev/scripts-and-sandbox.rst: fixed hashbang

2015-08-04 Titus Brown <titus@idyll.org> and Camille Scott
<camille.scott.w@gmail.com>
Expand Down
10 changes: 0 additions & 10 deletions TODO
@@ -1,8 +1,3 @@
@@ merge refactor => master
@@ review site for paper?

auto-memory setting.

find-knot speedup:
- too many redundant rounds of partitioning?

Expand All @@ -28,17 +23,14 @@ load-counting/bigcount loading is slooooow

----

screed bzip
screed slice
screed fasta/fastq output

screed streaming foo

---

fix tests cleanup
pyrex/cython stuff
script testing
docs!

---
Expand All @@ -54,8 +46,6 @@ fix tests and test cases to properly isolate/remove temp files.

fix dir(ht)

rename new_hashtable to new_countinghash

###

Semi-obsolete comments, pre partitioning:
Expand Down
2 changes: 1 addition & 1 deletion doc/dev/scripts-and-sandbox.rst
Expand Up @@ -38,7 +38,7 @@ All scripts in ``sandbox/`` must:
* be importable (enforced by ``test_import_all`` in
``test_sandbox_scripts.py``)
* be mentioned in ``sandbox/README.rst``
* have a hash-bang line (``#! /usr/bin/env python2``) at the top
* have a hash-bang line (``#! /usr/bin/env python``) at the top
* be command-line executable (``chmod a+x``)
* have a Copyright message (see below)
* have lowercase names
Expand Down
66 changes: 31 additions & 35 deletions khmer/__init__.py
Expand Up @@ -10,18 +10,14 @@
from math import log
import json

from khmer._khmer import CountingHash as _CountingHash
from khmer._khmer import Countgraph as _Countgraph
from khmer._khmer import LabelHash as _LabelHash
from khmer._khmer import Hashbits as _Hashbits
from khmer._khmer import Nodegraph as _Nodegraph
from khmer._khmer import HLLCounter as _HLLCounter
from khmer._khmer import ReadAligner as _ReadAligner

from khmer._khmer import forward_hash # figuregen/*.py
# tests/test_{functions,countinggraph,labelhash,counting_single}.py

from khmer._khmer import new_hashtable
# sandbox/{occupy,ctb-iterative-bench{-2-old}}.py
# tests/{test_c_wrapper,test_counting_single}.py
from khmer._khmer import forward_hash
# tests/test_{functions,countinggraph,counting_single}.py

from khmer._khmer import forward_hash_no_rc # tests/test_functions.py

Expand Down Expand Up @@ -53,10 +49,10 @@ def load_nodegraph(filename):
Keyword argument:
filename -- the name of the nodegraph file
"""
hashtable = _Hashbits(1, [1])
hashtable.load(filename)
nodegraph = _Nodegraph(1, [1])
nodegraph.load(filename)

return hashtable
return nodegraph


def load_countinggraph(filename):
Expand All @@ -65,10 +61,10 @@ def load_countinggraph(filename):
Keyword argument:
filename -- the name of the countinggraph file
"""
hashtable = _CountingHash(1, [1])
hashtable.load(filename)
countgraph = _Countgraph(1, [1])
countgraph.load(filename)

return hashtable
return countgraph


def extract_nodegraph_info(filename):
Expand Down Expand Up @@ -108,7 +104,7 @@ def extract_nodegraph_info(filename):
return ksize, round(table_size, -2), n_tables, version, ht_type


def extract_countinghash_info(filename):
def extract_countgraph_info(filename):
"""Open the given countinggraph file and return a tuple of information.
Return: the k-mer size, the table size, the number of tables, the bigcount
Expand All @@ -129,14 +125,14 @@ def extract_countinghash_info(filename):
ulonglong_size = len(pack('Q', 0))

try:
with open(filename, 'rb') as countinghash:
signature, = unpack('4s', countinghash.read(4))
version, = unpack('B', countinghash.read(1))
ht_type, = unpack('B', countinghash.read(1))
use_bigcount, = unpack('B', countinghash.read(1))
ksize, = unpack('I', countinghash.read(uint_size))
n_tables, = unpack('B', countinghash.read(1))
table_size, = unpack('Q', countinghash.read(ulonglong_size))
with open(filename, 'rb') as countgraph:
signature, = unpack('4s', countgraph.read(4))
version, = unpack('B', countgraph.read(1))
ht_type, = unpack('B', countgraph.read(1))
use_bigcount, = unpack('B', countgraph.read(1))
ksize, = unpack('I', countgraph.read(uint_size))
n_tables, = unpack('B', countgraph.read(1))
table_size, = unpack('Q', countgraph.read(ulonglong_size))
if signature != b'OXLI':
raise ValueError("Counting table '{}' is missing file type "
"signature. ".format(filename) + str(signature))
Expand All @@ -147,17 +143,17 @@ def extract_countinghash_info(filename):
ht_type


def calc_expected_collisions(hashtable, force=False, max_false_pos=.2):
"""Do a quick & dirty expected collision rate calculation on a hashtable.
def calc_expected_collisions(graph, force=False, max_false_pos=.2):
"""Do a quick & dirty expected collision rate calculation on a graph
Also check to see that collision rate is within threshold.
Keyword argument:
hashtable: the hashtable object to inspect
graph: the countgraph or nodegraph object to inspect
"""
sizes = hashtable.hashsizes()
sizes = graph.hashsizes()
n_ht = float(len(sizes))
occupancy = float(hashtable.n_occupied())
occupancy = float(graph.n_occupied())
min_size = min(sizes)

fp_one = occupancy / min_size
Expand Down Expand Up @@ -233,19 +229,19 @@ def get_n_primes_near_x(number, target):
# Additional functionality can be added to these classes as appropriate.


class CountingHash(_CountingHash):
class Countgraph(_Countgraph):

def __new__(cls, k, starting_size, n_tables):
primes = get_n_primes_near_x(n_tables, starting_size)
c = _CountingHash.__new__(cls, k, primes)
c = _Countgraph.__new__(cls, k, primes)
c.primes = primes
return c


class LabelHash(_LabelHash):

def __new__(cls, k, starting_size, n_tables):
hb = Hashbits(k, starting_size, n_tables)
hb = Nodegraph(k, starting_size, n_tables)
c = _LabelHash.__new__(cls, hb)
c.graph = hb
return c
Expand All @@ -255,17 +251,17 @@ class CountingLabelHash(_LabelHash):

def __new__(cls, k, starting_size, n_tables):
primes = get_n_primes_near_x(n_tables, starting_size)
hb = _CountingHash(k, primes)
hb = _Countgraph(k, primes)
c = _LabelHash.__new__(cls, hb)
c.graph = hb
return c


class Hashbits(_Hashbits):
class Nodegraph(_Nodegraph):

def __new__(cls, k, starting_size, n_tables):
primes = get_n_primes_near_x(n_tables, starting_size)
c = _Hashbits.__new__(cls, k, primes)
c = _Nodegraph.__new__(cls, k, primes)
c.primes = primes
return c

Expand Down Expand Up @@ -296,7 +292,7 @@ class ReadAligner(_ReadAligner):

"""Sequence to graph aligner.
ReadAligner uses a CountingHash (the counts of k-mers in the target DNA
ReadAligner uses a Countgraph (the counts of k-mers in the target DNA
sequences) as an implicit De Bruijn graph. Input DNA sequences are aligned
to this graph via a paired Hidden Markov Model.
Expand Down

0 comments on commit a7893cf

Please sign in to comment.