diff --git a/ChangeLog b/ChangeLog index 6366d9c3e2..3dae17da5a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,15 @@ 2015-07-29 Michael R. Crusoe * scripts/*.py,tests/*.py,sandbox/*.py,khmer/*.py,oxli/*.py: - counting_hash, hashtable->countgraph; hashbits->nodegraph; - check_space_for_hashtable->check_space_for_graph; hash_args->graph_args - + many function and variable renames: + counting_hash, countinghash, hashtable->countgraph; + CountingHash->Countgraph + hashbits->nodegraph; Hashbits->Nodegraph; + check_space_for_hashtable->check_space_for_graph; + hash_args->graph_args + * khmer/_khmer.cc: remove unused 'new_hashtable' method; match renames + * TODO: removed several items + * doc/dev/scripts-and-sandbox.rst: fixed hashbang 2015-08-04 Titus Brown and Camille Scott diff --git a/TODO b/TODO index 09c5572404..eeb5d779cf 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,3 @@ -@@ merge refactor => master -@@ review site for paper? - -auto-memory setting. - find-knot speedup: - too many redundant rounds of partitioning? @@ -28,17 +23,14 @@ load-counting/bigcount loading is slooooow ---- -screed bzip screed slice screed fasta/fastq output -screed streaming foo --- fix tests cleanup pyrex/cython stuff -script testing docs! --- @@ -54,8 +46,6 @@ fix tests and test cases to properly isolate/remove temp files. fix dir(ht) -rename new_hashtable to new_countinghash - ### Semi-obsolete comments, pre partitioning: diff --git a/doc/dev/scripts-and-sandbox.rst b/doc/dev/scripts-and-sandbox.rst index 9f48421d8c..0fbab08452 100644 --- a/doc/dev/scripts-and-sandbox.rst +++ b/doc/dev/scripts-and-sandbox.rst @@ -38,7 +38,7 @@ All scripts in ``sandbox/`` must: * be importable (enforced by ``test_import_all`` in ``test_sandbox_scripts.py``) * be mentioned in ``sandbox/README.rst`` -* have a hash-bang line (``#! /usr/bin/env python2``) at the top +* have a hash-bang line (``#! /usr/bin/env python``) at the top * be command-line executable (``chmod a+x``) * have a Copyright message (see below) * have lowercase names diff --git a/khmer/__init__.py b/khmer/__init__.py index 21d1fb5c56..2c96f67c67 100644 --- a/khmer/__init__.py +++ b/khmer/__init__.py @@ -10,18 +10,14 @@ from math import log import json -from khmer._khmer import CountingHash as _CountingHash +from khmer._khmer import Countgraph as _Countgraph from khmer._khmer import LabelHash as _LabelHash -from khmer._khmer import Hashbits as _Hashbits +from khmer._khmer import Nodegraph as _Nodegraph from khmer._khmer import HLLCounter as _HLLCounter from khmer._khmer import ReadAligner as _ReadAligner -from khmer._khmer import forward_hash # figuregen/*.py -# tests/test_{functions,countinggraph,labelhash,counting_single}.py - -from khmer._khmer import new_hashtable -# sandbox/{occupy,ctb-iterative-bench{-2-old}}.py -# tests/{test_c_wrapper,test_counting_single}.py +from khmer._khmer import forward_hash +# tests/test_{functions,countinggraph,counting_single}.py from khmer._khmer import forward_hash_no_rc # tests/test_functions.py @@ -53,10 +49,10 @@ def load_nodegraph(filename): Keyword argument: filename -- the name of the nodegraph file """ - hashtable = _Hashbits(1, [1]) - hashtable.load(filename) + nodegraph = _Nodegraph(1, [1]) + nodegraph.load(filename) - return hashtable + return nodegraph def load_countinggraph(filename): @@ -65,10 +61,10 @@ def load_countinggraph(filename): Keyword argument: filename -- the name of the countinggraph file """ - hashtable = _CountingHash(1, [1]) - hashtable.load(filename) + countgraph = _Countgraph(1, [1]) + countgraph.load(filename) - return hashtable + return countgraph def extract_nodegraph_info(filename): @@ -108,7 +104,7 @@ def extract_nodegraph_info(filename): return ksize, round(table_size, -2), n_tables, version, ht_type -def extract_countinghash_info(filename): +def extract_countgraph_info(filename): """Open the given countinggraph file and return a tuple of information. Return: the k-mer size, the table size, the number of tables, the bigcount @@ -129,14 +125,14 @@ def extract_countinghash_info(filename): ulonglong_size = len(pack('Q', 0)) try: - with open(filename, 'rb') as countinghash: - signature, = unpack('4s', countinghash.read(4)) - version, = unpack('B', countinghash.read(1)) - ht_type, = unpack('B', countinghash.read(1)) - use_bigcount, = unpack('B', countinghash.read(1)) - ksize, = unpack('I', countinghash.read(uint_size)) - n_tables, = unpack('B', countinghash.read(1)) - table_size, = unpack('Q', countinghash.read(ulonglong_size)) + with open(filename, 'rb') as countgraph: + signature, = unpack('4s', countgraph.read(4)) + version, = unpack('B', countgraph.read(1)) + ht_type, = unpack('B', countgraph.read(1)) + use_bigcount, = unpack('B', countgraph.read(1)) + ksize, = unpack('I', countgraph.read(uint_size)) + n_tables, = unpack('B', countgraph.read(1)) + table_size, = unpack('Q', countgraph.read(ulonglong_size)) if signature != b'OXLI': raise ValueError("Counting table '{}' is missing file type " "signature. ".format(filename) + str(signature)) @@ -147,17 +143,17 @@ def extract_countinghash_info(filename): ht_type -def calc_expected_collisions(hashtable, force=False, max_false_pos=.2): - """Do a quick & dirty expected collision rate calculation on a hashtable. +def calc_expected_collisions(graph, force=False, max_false_pos=.2): + """Do a quick & dirty expected collision rate calculation on a graph Also check to see that collision rate is within threshold. Keyword argument: - hashtable: the hashtable object to inspect + graph: the countgraph or nodegraph object to inspect """ - sizes = hashtable.hashsizes() + sizes = graph.hashsizes() n_ht = float(len(sizes)) - occupancy = float(hashtable.n_occupied()) + occupancy = float(graph.n_occupied()) min_size = min(sizes) fp_one = occupancy / min_size @@ -233,11 +229,11 @@ def get_n_primes_near_x(number, target): # Additional functionality can be added to these classes as appropriate. -class CountingHash(_CountingHash): +class Countgraph(_Countgraph): def __new__(cls, k, starting_size, n_tables): primes = get_n_primes_near_x(n_tables, starting_size) - c = _CountingHash.__new__(cls, k, primes) + c = _Countgraph.__new__(cls, k, primes) c.primes = primes return c @@ -245,7 +241,7 @@ def __new__(cls, k, starting_size, n_tables): class LabelHash(_LabelHash): def __new__(cls, k, starting_size, n_tables): - hb = Hashbits(k, starting_size, n_tables) + hb = Nodegraph(k, starting_size, n_tables) c = _LabelHash.__new__(cls, hb) c.graph = hb return c @@ -255,17 +251,17 @@ class CountingLabelHash(_LabelHash): def __new__(cls, k, starting_size, n_tables): primes = get_n_primes_near_x(n_tables, starting_size) - hb = _CountingHash(k, primes) + hb = _Countgraph(k, primes) c = _LabelHash.__new__(cls, hb) c.graph = hb return c -class Hashbits(_Hashbits): +class Nodegraph(_Nodegraph): def __new__(cls, k, starting_size, n_tables): primes = get_n_primes_near_x(n_tables, starting_size) - c = _Hashbits.__new__(cls, k, primes) + c = _Nodegraph.__new__(cls, k, primes) c.primes = primes return c @@ -296,7 +292,7 @@ class ReadAligner(_ReadAligner): """Sequence to graph aligner. - ReadAligner uses a CountingHash (the counts of k-mers in the target DNA + ReadAligner uses a Countgraph (the counts of k-mers in the target DNA sequences) as an implicit De Bruijn graph. Input DNA sequences are aligned to this graph via a paired Hidden Markov Model. diff --git a/khmer/_khmer.cc b/khmer/_khmer.cc index d41d438543..69900bb04d 100644 --- a/khmer/_khmer.cc +++ b/khmer/_khmer.cc @@ -682,11 +682,11 @@ static void khmer_hashbits_dealloc(khmer_KHashbits_Object * obj); static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args, PyObject * kwds); -static PyTypeObject khmer_KHashbits_Type +static PyTypeObject khmer_KNodegraph_Type CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashbits_Object") = { PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */ - "_khmer.Hashbits", /* tp_name */ + "_khmer.Nodegraph", /* tp_name */ sizeof(khmer_KHashbits_Object), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)khmer_hashbits_dealloc, /*tp_dealloc*/ @@ -2632,10 +2632,6 @@ CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KHashtable_Object") #define is_hashtable_obj(v) (Py_TYPE(v) == &khmer_KHashtable_Type) -// -// _new_hashtable -// - // // KCountingHash object // @@ -3006,7 +3002,7 @@ count_abundance_distribution_with_reads_parser(khmer_KCountingHash_Object * me, khmer_KHashbits_Object *tracking_obj = NULL; if (!PyArg_ParseTuple(args, "O!O!", &python::khmer_ReadParser_Type, - &rparser_obj, &khmer_KHashbits_Type, &tracking_obj)) { + &rparser_obj, &khmer_KNodegraph_Type, &tracking_obj)) { return NULL; } @@ -3056,7 +3052,7 @@ count_abundance_distribution(khmer_KCountingHash_Object * me, PyObject * args) const char * filename = NULL; khmer_KHashbits_Object * tracking_obj = NULL; - if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KHashbits_Type, + if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KNodegraph_Type, &tracking_obj)) { return NULL; } @@ -3188,11 +3184,11 @@ static PyMethodDef khmer_counting_methods[] = { static PyObject* _new_counting_hash(PyTypeObject * type, PyObject * args, PyObject * kwds); -static PyTypeObject khmer_KCountingHash_Type +static PyTypeObject khmer_KCountgraph_Type CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KCountingHash_Object") = { PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */ - "_khmer.CountingHash", /*tp_name*/ + "_khmer.Countgraph", /*tp_name*/ sizeof(khmer_KCountingHash_Object), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)khmer_counting_dealloc, /*tp_dealloc*/ @@ -3231,37 +3227,7 @@ CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KCountingHash_Object") _new_counting_hash, /* tp_new */ }; -#define is_counting_obj(v) (Py_TYPE(v) == &khmer_KCountingHash_Type) - -// -// new_hashtable -// - -static PyObject* new_hashtable(PyObject * self, PyObject * args) -{ - unsigned int k = 0; - unsigned long long size = 0; - - if (!PyArg_ParseTuple(args, "IK", &k, &size)) { - return NULL; - } - - khmer_KCountingHash_Object * kcounting_obj = (khmer_KCountingHash_Object *) \ - PyObject_New(khmer_KCountingHash_Object, &khmer_KCountingHash_Type); - - if (kcounting_obj == NULL) { - return NULL; - } - - try { - kcounting_obj->counting = new CountingHash(k, size); - } catch (std::bad_alloc &e) { - return PyErr_NoMemory(); - } - kcounting_obj->khashtable.hashtable = kcounting_obj->counting; - - return (PyObject *) kcounting_obj; -} +#define is_counting_obj(v) (Py_TYPE(v) == &khmer_KCountgraph_Type) // // _new_counting_hash @@ -3327,7 +3293,7 @@ hashbits_count_overlap(khmer_KHashbits_Object * me, PyObject * args) const char * filename; Hashbits * ht2; - if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KHashbits_Type, + if (!PyArg_ParseTuple(args, "sO!", &filename, &khmer_KNodegraph_Type, &ht2_argu)) { return NULL; } @@ -3373,7 +3339,7 @@ hashbits_update(khmer_KHashbits_Object * me, PyObject * args) Hashbits * other; khmer_KHashbits_Object * other_o; - if (!PyArg_ParseTuple(args, "O!", &khmer_KHashbits_Type, &other_o)) { + if (!PyArg_ParseTuple(args, "O!", &khmer_KNodegraph_Type, &other_o)) { return NULL; } @@ -3447,7 +3413,7 @@ static PyObject* khmer_hashbits_new(PyTypeObject * type, PyObject * args, return (PyObject *) self; } -#define is_hashbits_obj(v) (Py_TYPE(v) == &khmer_KHashbits_Type) +#define is_hashbits_obj(v) (Py_TYPE(v) == &khmer_KNodegraph_Type) //////////////////////////////////////////////////////////////////////////// @@ -3602,7 +3568,7 @@ subset_partition_average_coverages(khmer_KSubsetPartition_Object * me, khmer_KCountingHash_Object * counting_o; - if (!PyArg_ParseTuple(args, "O!", &khmer_KCountingHash_Type, &counting_o)) { + if (!PyArg_ParseTuple(args, "O!", &khmer_KCountgraph_Type, &counting_o)) { return NULL; } @@ -3710,10 +3676,10 @@ static PyObject * khmer_labelhash_new(PyTypeObject *type, PyObject *args, return NULL; } - if (PyObject_TypeCheck(hashtable_o, &khmer_KHashbits_Type)) { + if (PyObject_TypeCheck(hashtable_o, &khmer_KNodegraph_Type)) { khmer_KHashbits_Object * kho = (khmer_KHashbits_Object *) hashtable_o; hashtable = kho->hashbits; - } else if (PyObject_TypeCheck(hashtable_o, &khmer_KCountingHash_Type)) { + } else if (PyObject_TypeCheck(hashtable_o, &khmer_KCountgraph_Type)) { khmer_KCountingHash_Object * cho = (khmer_KCountingHash_Object *) hashtable_o; hashtable = cho->counting; } else { @@ -4123,7 +4089,7 @@ hashtable_traverse_from_tags(khmer_KHashtable_Object * me, PyObject * args) khmer_KCountingHash_Object * counting_o = NULL; unsigned int distance, threshold, frequency; - if (!PyArg_ParseTuple(args, "O!III", &khmer_KCountingHash_Type, &counting_o, + if (!PyArg_ParseTuple(args, "O!III", &khmer_KCountgraph_Type, &counting_o, &distance, &threshold, &frequency)) { return NULL; } @@ -4147,7 +4113,7 @@ hashtable_repartition_largest_partition(khmer_KHashtable_Object * me, if (!PyArg_ParseTuple(args, "OO!III", &subset_o, - &khmer_KCountingHash_Type, &counting_o, + &khmer_KCountgraph_Type, &counting_o, &distance, &threshold, &frequency)) { return NULL; } @@ -4336,7 +4302,7 @@ static PyObject* khmer_ReadAligner_new(PyTypeObject *type, PyObject * args, if(!PyArg_ParseTuple( args, "O!Hd|(dddd)((dddddd)(dddd)(dddd)(dddddd)(dddd)(dddd))", - &khmer_KCountingHash_Type, &ch, &trusted_cov_cutoff, + &khmer_KCountgraph_Type, &ch, &trusted_cov_cutoff, &bits_theta, &scoring_matrix[0], &scoring_matrix[1], &scoring_matrix[2], &scoring_matrix[3], &transitions[0], &transitions[1], &transitions[2], &transitions[3], @@ -4414,7 +4380,7 @@ hashtable_consume_fasta_and_traverse(khmer_KHashtable_Object * me, if (!PyArg_ParseTuple(args, "sIIIO!", &filename, &radius, &big_threshold, &transfer_threshold, - &khmer_KCountingHash_Type, &counting_o)) { + &khmer_KCountgraph_Type, &counting_o)) { return NULL; } @@ -4936,22 +4902,6 @@ get_version_cpp( PyObject * self, PyObject * args ) // static PyMethodDef KhmerMethods[] = { -#if (0) - { - "new_config", new_config, - METH_VARARGS, "Create a default internals config" - }, -#endif -#if (0) - { - "set_config", set_active_config, - METH_VARARGS, "Set active khmer configuration object" - }, -#endif - { - "new_hashtable", new_hashtable, - METH_VARARGS, "Create an empty single-table counting hash" - }, { "forward_hash", forward_hash, METH_VARARGS, "", @@ -4993,8 +4943,8 @@ MOD_INIT(_khmer) return MOD_ERROR_VAL; } - khmer_KCountingHash_Type.tp_base = &khmer_KHashtable_Type; - if (PyType_Ready(&khmer_KCountingHash_Type) < 0) { + khmer_KCountgraph_Type.tp_base = &khmer_KHashtable_Type; + if (PyType_Ready(&khmer_KCountgraph_Type) < 0) { return MOD_ERROR_VAL; } @@ -5007,13 +4957,13 @@ MOD_INIT(_khmer) return MOD_ERROR_VAL; } - khmer_KHashbits_Type.tp_base = &khmer_KHashtable_Type; - khmer_KHashbits_Type.tp_methods = khmer_hashbits_methods; - if (PyType_Ready(&khmer_KHashbits_Type) < 0) { + khmer_KNodegraph_Type.tp_base = &khmer_KHashtable_Type; + khmer_KNodegraph_Type.tp_methods = khmer_hashbits_methods; + if (PyType_Ready(&khmer_KNodegraph_Type) < 0) { return MOD_ERROR_VAL; } - khmer_KLabelHash_Type.tp_base = &khmer_KHashbits_Type; + khmer_KLabelHash_Type.tp_base = &khmer_KNodegraph_Type; khmer_KLabelHash_Type.tp_methods = khmer_labelhash_methods; khmer_KLabelHash_Type.tp_new = khmer_labelhash_new; if (PyType_Ready(&khmer_KLabelHash_Type) < 0) { @@ -5055,14 +5005,14 @@ MOD_INIT(_khmer) return MOD_ERROR_VAL; } - Py_INCREF(&khmer_KCountingHash_Type); - if (PyModule_AddObject( m, "CountingHash", - (PyObject *)&khmer_KCountingHash_Type ) < 0) { + Py_INCREF(&khmer_KCountgraph_Type); + if (PyModule_AddObject( m, "Countgraph", + (PyObject *)&khmer_KCountgraph_Type ) < 0) { return MOD_ERROR_VAL; } - Py_INCREF(&khmer_KHashbits_Type); - if (PyModule_AddObject(m, "Hashbits", (PyObject *)&khmer_KHashbits_Type) < 0) { + Py_INCREF(&khmer_KNodegraph_Type); + if (PyModule_AddObject(m, "Nodegraph", (PyObject *)&khmer_KNodegraph_Type) < 0) { return MOD_ERROR_VAL; } diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 2467cd095d..31f3eb8a98 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -15,7 +15,7 @@ import screed import khmer -from khmer import extract_countinghash_info, extract_nodegraph_info +from khmer import extract_countgraph_info, extract_nodegraph_info from khmer import __version__ from .utils import print_error from .khmer_logger import log_info @@ -78,7 +78,7 @@ def build_graph_args(descr=None, epilog=None, parser=None): def build_counting_args(descr=None, epilog=None): """Build an ArgumentParser with args for countinggraph based scripts.""" parser = build_graph_args(descr=descr, epilog=epilog) - parser.hashtype = 'countgraph' + parser.graphtype = 'countgraph' return parser @@ -86,11 +86,11 @@ def build_counting_args(descr=None, epilog=None): def build_nodegraph_args(descr=None, epilog=None, parser=None): """Build an ArgumentParser with args for nodegraph based scripts.""" parser = build_graph_args(descr=descr, epilog=epilog, parser=parser) - parser.hashtype = 'nodegraph' + parser.graphtype = 'nodegraph' return parser -# add an argument for loadhash with warning about parameters +# add an argument for loadgraph with warning about parameters def add_loadgraph_args(parser): @@ -110,13 +110,13 @@ def __call__(self, parser, namespace, values, option_string=None): ** Your values for ksize, n_tables, and tablesize ** will be ignored.'''.format(hashfile=values)) - if hasattr(parser, 'hashtype'): + if hasattr(parser, 'graphtype'): info = None - if parser.hashtype == 'nodegraph': + if parser.graphtype == 'nodegraph': info = extract_nodegraph_info( getattr(namespace, self.dest)) - elif parser.hashtype == 'countgraph': - info = extract_countinghash_info( + elif parser.graphtype == 'countgraph': + info = extract_countgraph_info( getattr(namespace, self.dest)) if info: K = info[0] @@ -131,15 +131,15 @@ def __call__(self, parser, namespace, values, option_string=None): action=LoadAction) -def calculate_tablesize(args, hashtype, multiplier=1.0): - if hashtype not in ('countgraph', 'nodegraph'): - raise ValueError("unknown graph type: %s" % (hashtype,)) +def calculate_tablesize(args, graphtype, multiplier=1.0): + if graphtype not in ('countgraph', 'nodegraph'): + raise ValueError("unknown graph type: %s" % (graphtype,)) if args.max_memory_usage: - if hashtype == 'countgraph': + if graphtype == 'countgraph': tablesize = args.max_memory_usage / args.n_tables / \ float(multiplier) - elif hashtype == 'nodegraph': + elif graphtype == 'nodegraph': tablesize = 8. * args.max_memory_usage / args.n_tables / \ float(multiplier) else: @@ -156,7 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0): sys.exit(1) tablesize = calculate_tablesize(args, 'nodegraph', multiplier) - return khmer.Hashbits(ksize, tablesize, args.n_tables) + return khmer.Nodegraph(ksize, tablesize, args.n_tables) def create_countgraph(args, ksize=None, multiplier=1.0): @@ -167,20 +167,20 @@ def create_countgraph(args, ksize=None, multiplier=1.0): sys.exit(1) tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier) - return khmer.CountingHash(ksize, tablesize, args.n_tables) + return khmer.Countgraph(ksize, tablesize, args.n_tables) -def report_on_config(args, hashtype='countgraph'): +def report_on_config(args, graphtype='countgraph'): """Print out configuration. Summarize the configuration produced by the command-line arguments made available by this module. """ from khmer.utils import print_error - if hashtype not in ('countgraph', 'nodegraph'): - raise ValueError("unknown graph type: %s" % (hashtype,)) + if graphtype not in ('countgraph', 'nodegraph'): + raise ValueError("unknown graph type: %s" % (graphtype,)) - tablesize = calculate_tablesize(args, hashtype) + tablesize = calculate_tablesize(args, graphtype) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) @@ -189,12 +189,12 @@ def report_on_config(args, hashtype='countgraph'): " - max tablesize = {0:5.2g} \t(-x)".format(tablesize) ) print_error("") - if hashtype == 'countgraph': + if graphtype == 'countgraph': print_error( "Estimated memory usage is {0:.2g} bytes " "(n_tables x max_tablesize)".format( args.n_tables * tablesize)) - elif hashtype == 'nodegraph': + elif graphtype == 'nodegraph': print_error( "Estimated memory usage is {0:.2g} bytes " "(n_tables x max_tablesize / 8)".format(args.n_tables * diff --git a/oxli/build_graph.py b/oxli/build_graph.py index 1edad3b6fc..4ef19dda8a 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -45,7 +45,7 @@ def build_parser(parser): def main(args): info('build-graph.py', ['graph', 'SeqAn']) - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') base = args.output_filename filenames = args.input_filenames diff --git a/sandbox/assembly-diff-2.py b/sandbox/assembly-diff-2.py index 1d39969eef..ca5b67b7e5 100755 --- a/sandbox/assembly-diff-2.py +++ b/sandbox/assembly-diff-2.py @@ -24,7 +24,7 @@ def main(): filename2 = sys.argv[2] uniq2 = open(os.path.basename(sys.argv[2]) + '.uniq', 'w') - kh = khmer.Hashbits(K, HASHTABLE_SIZE, N_HT) + kh = khmer.Nodegraph(K, HASHTABLE_SIZE, N_HT) for n, record in enumerate(screed.open(filename1)): if n % 10000 == 0: print('...', filename1, n) diff --git a/sandbox/assembly-diff.py b/sandbox/assembly-diff.py index e3fdee0052..c884d54877 100755 --- a/sandbox/assembly-diff.py +++ b/sandbox/assembly-diff.py @@ -26,9 +26,9 @@ def main(): uniq2 = open(os.path.basename(sys.argv[2]) + '.uniq', 'w') paths = sys.argv[3] - kh1 = khmer.Hashbits(K, HASHTABLE_SIZE, N_HT) + kh1 = khmer.Nodegraph(K, HASHTABLE_SIZE, N_HT) kh1.consume_fasta(filename1) - kh2 = khmer.Hashbits(K, HASHTABLE_SIZE, N_HT) + kh2 = khmer.Nodegraph(K, HASHTABLE_SIZE, N_HT) kh2.consume_fasta(filename2) for record in screed.open(paths): diff --git a/sandbox/bloom-count-intersection.py b/sandbox/bloom-count-intersection.py index 71405d46e8..63ac2a437e 100755 --- a/sandbox/bloom-count-intersection.py +++ b/sandbox/bloom-count-intersection.py @@ -20,7 +20,7 @@ def main(): HT_SIZE = int(sys.argv[3]) # size of hashtable N_HT = int(sys.argv[4]) # number of hashtables - ht = khmer.Hashbits(K, HT_SIZE, N_HT) + ht = khmer.Nodegraph(K, HT_SIZE, N_HT) n_unique = 0 for n, record in enumerate(fasta_iter(open(filename))): @@ -36,7 +36,7 @@ def main(): print('# of occupied bin:', ht.n_occupied()) filename2 = sys.argv[5] - ht2 = khmer.Hashbits(K, HT_SIZE, N_HT) + ht2 = khmer.Nodegraph(K, HT_SIZE, N_HT) n_unique = 0 n_overlap = 0 for n, record in enumerate(fasta_iter(open(filename2))): diff --git a/sandbox/bloom-count.py b/sandbox/bloom-count.py index fc833cc8eb..5b32b121ab 100755 --- a/sandbox/bloom-count.py +++ b/sandbox/bloom-count.py @@ -20,7 +20,7 @@ def main(): HT_SIZE = int(sys.argv[3]) # size of hashtable N_HT = int(sys.argv[4]) # number of hashtables - ht = khmer.Hashbits(K, HT_SIZE, N_HT) + ht = khmer.Nodegraph(K, HT_SIZE, N_HT) n_unique = 0 for n, record in enumerate(fasta_iter(open(filename))): diff --git a/sandbox/build-sparse-graph.py b/sandbox/build-sparse-graph.py index 1a12b3b408..6c9e734550 100755 --- a/sandbox/build-sparse-graph.py +++ b/sandbox/build-sparse-graph.py @@ -20,7 +20,7 @@ def main(): K = int(sys.argv[1]) x = float(sys.argv[2]) - ht = khmer.Hashbits(K, x, 4) + ht = khmer.Nodegraph(K, x, 4) sparse_graph = gt.Graph() hashes = sparse_graph.new_vertex_property("long long") diff --git a/sandbox/calc-error-profile.py b/sandbox/calc-error-profile.py index 2cceb306fd..10225ee2bb 100755 --- a/sandbox/calc-error-profile.py +++ b/sandbox/calc-error-profile.py @@ -67,7 +67,7 @@ def main(): # build a small counting hash w/default parameters. In general there # should be no need to change these parameters. - ht = khmer.CountingHash(K, HASHSIZE, N_HT) + ht = khmer.Countgraph(K, HASHSIZE, N_HT) # initialize list to contain counts of errors by position positions = [0] * MAX_SEQ_LEN diff --git a/sandbox/collect-variants.py b/sandbox/collect-variants.py index f63ca5468f..a8d4211609 100755 --- a/sandbox/collect-variants.py +++ b/sandbox/collect-variants.py @@ -64,7 +64,7 @@ def main(): ht = khmer.load_countinggraph(args.loadhash) else: print('making hashtable') - ht = khmer.CountingHash(K, HT_SIZE, N_HT) + ht = khmer.Countgraph(K, HT_SIZE, N_HT) aligner = khmer.ReadAligner(ht, args.trusted_cutoff, args.bits_theta) diff --git a/sandbox/count-kmers-single.py b/sandbox/count-kmers-single.py index d12cc4b589..9dfcb78601 100755 --- a/sandbox/count-kmers-single.py +++ b/sandbox/count-kmers-single.py @@ -51,13 +51,13 @@ def main(): check_input_files(args.input_sequence_filename, False) print ('making k-mer counting table', file=sys.stderr) - countinggraph = khmer.CountingHash(args.ksize, args.max_tablesize, + countinggraph = khmer.Countgraph(args.ksize, args.max_tablesize, args.n_tables) # @CTB countinggraph.set_use_bigcount(args.bigcount) kmer_size = countinggraph.ksize() hashsizes = countinggraph.hashsizes() - tracking = khmer._Hashbits( # pylint: disable=protected-access + tracking = khmer._Nodegraph( # pylint: disable=protected-access kmer_size, hashsizes) print ('kmer_size: %s' % countinggraph.ksize(), file=sys.stderr) diff --git a/sandbox/count-kmers.py b/sandbox/count-kmers.py index 5f3c437429..91f41847b3 100644 --- a/sandbox/count-kmers.py +++ b/sandbox/count-kmers.py @@ -54,7 +54,7 @@ def main(): kmer_size = countinggraph.ksize() hashsizes = countinggraph.hashsizes() - tracking = khmer._Hashbits( # pylint: disable=protected-access + tracking = khmer._Nodegraph( # pylint: disable=protected-access kmer_size, hashsizes) if args.output_file is None: diff --git a/sandbox/find-high-abund-kmers.py b/sandbox/find-high-abund-kmers.py index db43686997..81ae02bda3 100755 --- a/sandbox/find-high-abund-kmers.py +++ b/sandbox/find-high-abund-kmers.py @@ -65,7 +65,7 @@ def main(): ### print('making hashtable') - ht = khmer.CountingHash(K, HT_SIZE, N_HT) + ht = khmer.Countgraph(K, HT_SIZE, N_HT) ht.set_use_bigcount(True) print('consuming input', input) diff --git a/sandbox/graph-size.py b/sandbox/graph-size.py index 41cdf07b88..8ca066a879 100755 --- a/sandbox/graph-size.py +++ b/sandbox/graph-size.py @@ -42,7 +42,7 @@ def main(): print('--') print('creating ht') - ht = khmer.Hashbits(K, HASHTABLE_SIZE, N_HT) + ht = khmer.Nodegraph(K, HASHTABLE_SIZE, N_HT) print('eating fa', infile) total_reads, n_consumed = ht.consume_fasta(infile) outfp = open(outfile, 'w') diff --git a/sandbox/normalize-by-median-pct.py b/sandbox/normalize-by-median-pct.py index d7f82fab85..eba83973db 100755 --- a/sandbox/normalize-by-median-pct.py +++ b/sandbox/normalize-by-median-pct.py @@ -88,7 +88,7 @@ def main(): ht = khmer.load_countinggraph(args.loadhash) else: print('making hashtable') - ht = khmer.CountingHash(K, HT_SIZE, N_HT) + ht = khmer.Countgraph(K, HT_SIZE, N_HT) total = 0 discarded = 0 diff --git a/sandbox/optimal_args_hashbits.py b/sandbox/optimal_args_hashbits.py index 46e4f5c784..e1028f9a15 100644 --- a/sandbox/optimal_args_hashbits.py +++ b/sandbox/optimal_args_hashbits.py @@ -39,7 +39,7 @@ def get_parser(): def main(): info('optimal_args_nodegraph.py', ['graph', 'SeqAn']) args = get_parser().parse_args() - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') filenames = args.input_filenames diff --git a/sandbox/print-stoptags.py b/sandbox/print-stoptags.py index 1e59b44f53..0729629b90 100755 --- a/sandbox/print-stoptags.py +++ b/sandbox/print-stoptags.py @@ -13,7 +13,7 @@ def main(): - ht = khmer.Hashbits(32, 1, 1) + ht = khmer.Nodegraph(32, 1, 1) ht.load_stop_tags(sys.argv[1]) ht.print_stop_tags(os.path.basename(sys.argv[1]) + '.txt') diff --git a/sandbox/print-tagset.py b/sandbox/print-tagset.py index c8612784a1..ea52e26d5b 100755 --- a/sandbox/print-tagset.py +++ b/sandbox/print-tagset.py @@ -14,7 +14,7 @@ def main(): - ht = khmer.Hashbits(32, 1, 1) + ht = khmer.Nodegraph(32, 1, 1) ht.load_tagset(sys.argv[1]) print('loaded!') ht.print_tagset(os.path.basename(sys.argv[1]) + '.txt') diff --git a/sandbox/stoptags-by-position.py b/sandbox/stoptags-by-position.py index 1b92fa621b..09c232d2a0 100755 --- a/sandbox/stoptags-by-position.py +++ b/sandbox/stoptags-by-position.py @@ -14,7 +14,7 @@ def main(): - ht = khmer.Hashbits(K, 1, 1) + ht = khmer.Nodegraph(K, 1, 1) x = [0] * 255 y = [0] * 255 diff --git a/sandbox/subset-report.py b/sandbox/subset-report.py index fe230e6238..b8aa060f26 100755 --- a/sandbox/subset-report.py +++ b/sandbox/subset-report.py @@ -17,7 +17,7 @@ def main(): subset_filenames = sys.argv[1:] - ht = khmer.Hashbits(K, 1, 1) + ht = khmer.Nodegraph(K, 1, 1) for filename in subset_filenames: print('--') print('partition map:', filename) diff --git a/sandbox/sweep-files.py b/sandbox/sweep-files.py index 5804748320..56ef60042c 100755 --- a/sandbox/sweep-files.py +++ b/sandbox/sweep-files.py @@ -108,7 +108,7 @@ def main(): if args.ksize < MIN_KSIZE: args.ksize = MIN_KSIZE - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') K = args.ksize HT_SIZE = args.max_tablesize diff --git a/sandbox/sweep-out-reads-with-contigs.py b/sandbox/sweep-out-reads-with-contigs.py index 55f7a91146..afd0cf769c 100755 --- a/sandbox/sweep-out-reads-with-contigs.py +++ b/sandbox/sweep-out-reads-with-contigs.py @@ -22,7 +22,7 @@ def main(): outfile = sys.argv[3] # create a nodegraph data structure - ht = khmer.Hashbits(K, 1, 1) + ht = khmer.Nodegraph(K, 1, 1) # tag every k-mer in the contigs ht._set_tag_density(0) diff --git a/sandbox/sweep-reads.py b/sandbox/sweep-reads.py index eef2da923f..3d1ac26ad1 100755 --- a/sandbox/sweep-reads.py +++ b/sandbox/sweep-reads.py @@ -213,7 +213,7 @@ def main(): if args.ksize < MIN_KSIZE: args.ksize = MIN_KSIZE - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') K = args.ksize HT_SIZE = args.max_tablesize diff --git a/sandbox/sweep-reads2.py b/sandbox/sweep-reads2.py index fdd677c83c..528dd8778a 100755 --- a/sandbox/sweep-reads2.py +++ b/sandbox/sweep-reads2.py @@ -58,7 +58,7 @@ def main(): outfp = open(outfile, 'w') # create a nodegraph data structure - ht = khmer.Hashbits(K, HT_SIZE, N_HT) + ht = khmer.Nodegraph(K, HT_SIZE, N_HT) # load contigs, connect into N partitions print('loading input reads from', inp) diff --git a/sandbox/sweep-reads3.py b/sandbox/sweep-reads3.py index 0ddfde63f7..fd4b82b154 100755 --- a/sandbox/sweep-reads3.py +++ b/sandbox/sweep-reads3.py @@ -64,7 +64,7 @@ def main(): query_list = [] for n, inp_name in enumerate(inputlist): # create a nodegraph data structure - ht = khmer.Hashbits(K, HT_SIZE, N_HT) + ht = khmer.Nodegraph(K, HT_SIZE, N_HT) outfile = os.path.basename(inp_name) + '.sweep3' outfp = open(outfile, 'w') diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py index 2f52ea1f33..c651c1f6a7 100755 --- a/scripts/abundance-dist.py +++ b/scripts/abundance-dist.py @@ -78,11 +78,10 @@ def main(): kmer_size = countinggraph.ksize() hashsizes = countinggraph.hashsizes() - tracking = khmer._Hashbits( # pylint: disable=protected-access + tracking = khmer._Nodegraph( # pylint: disable=protected-access kmer_size, hashsizes) print('K:', kmer_size, file=sys.stderr) - print('HT sizes:', hashsizes, file=sys.stderr) print('outputting to', args.output_histogram_filename, file=sys.stderr) if args.output_histogram_filename in ('-', '/dev/stdout'): diff --git a/scripts/annotate-partitions.py b/scripts/annotate-partitions.py index e09f95895f..a2ee688d79 100755 --- a/scripts/annotate-partitions.py +++ b/scripts/annotate-partitions.py @@ -67,7 +67,7 @@ def main(): ksize = args.ksize filenames = args.input_filenames - htable = khmer.Hashbits(ksize, 1, 1) + htable = khmer.Nodegraph(ksize, 1, 1) partitionmap_file = args.graphbase + '.pmap.merged' diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py index d33681b2bd..aeb7515304 100755 --- a/scripts/count-overlap.py +++ b/scripts/count-overlap.py @@ -51,7 +51,7 @@ def get_parser(): def main(): info('count-overlap.py', ['counting']) args = get_parser().parse_args() - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') for infile in [args.ptfile, args.fafile]: check_input_files(infile, args.force) diff --git a/scripts/do-partition.py b/scripts/do-partition.py index 52a78339ee..c9e2ec1ec0 100755 --- a/scripts/do-partition.py +++ b/scripts/do-partition.py @@ -114,7 +114,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements info('do-partition.py', ['graph']) args = get_parser().parse_args() - report_on_config(args, hashtype='nodegraph') + report_on_config(args, graphtype='nodegraph') for infile in args.input_filenames: check_input_files(infile, args.force) @@ -211,7 +211,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements print('loading %d pmap files (first one: %s)' % (len(pmap_files), pmap_files[0]), file=sys.stderr) - htable = khmer.Hashbits(args.ksize, 1, 1) + htable = khmer.Nodegraph(args.ksize, 1, 1) for pmap_file in pmap_files: print('merging', pmap_file, file=sys.stderr) diff --git a/scripts/filter-stoptags.py b/scripts/filter-stoptags.py index d7e87f2255..f2bfe2896e 100755 --- a/scripts/filter-stoptags.py +++ b/scripts/filter-stoptags.py @@ -65,7 +65,7 @@ def main(): check_space(infiles, args.force) print('loading stop tags, with K', args.ksize, file=sys.stderr) - htable = khmer.Hashbits(args.ksize, 1, 1) + htable = khmer.Nodegraph(args.ksize, 1, 1) htable.load_stop_tags(stoptags) def process_fn(record): diff --git a/scripts/merge-partitions.py b/scripts/merge-partitions.py index c77d822b85..7a716062d1 100755 --- a/scripts/merge-partitions.py +++ b/scripts/merge-partitions.py @@ -61,7 +61,7 @@ def main(): (len(pmap_files), pmap_files[0]), file=sys.stderr) ksize = args.ksize - htable = khmer.Hashbits(ksize, 1, 1) + htable = khmer.Nodegraph(ksize, 1, 1) for _ in pmap_files: check_input_files(_, args.force) diff --git a/tests/test_countinggraph.py b/tests/test_countgraph.py similarity index 87% rename from tests/test_countinggraph.py rename to tests/test_countgraph.py index 3b1bca1544..44b60f734b 100644 --- a/tests/test_countinggraph.py +++ b/tests/test_countgraph.py @@ -40,10 +40,10 @@ def teardown(): utils.cleanup() -class Test_CountingHash(object): +class Test_Countgraph(object): def setup(self): - self.hi = khmer._CountingHash(12, PRIMES_1m) + self.hi = khmer._Countgraph(12, PRIMES_1m) def test_failed_get(self): GG = 'G' * 12 # forward_hash: 11184810 @@ -124,7 +124,7 @@ def test_collision_3(self): def test_get_raw_tables(): - ht = khmer.CountingHash(20, 1e5, 4) + ht = khmer.Countgraph(20, 1e5, 4) tables = ht.get_raw_tables() for size, table in zip(ht.hashsizes(), tables): @@ -133,7 +133,7 @@ def test_get_raw_tables(): def test_get_raw_tables_view(): - ht = khmer.CountingHash(20, 1e5, 4) + ht = khmer.Countgraph(20, 1e5, 4) tables = ht.get_raw_tables() for tab in tables: assert sum(tab.tolist()) == 0 @@ -145,7 +145,7 @@ def test_get_raw_tables_view(): @attr('huge') def test_toobig(): try: - ct = khmer.CountingHash(30, 1e13, 1) + ct = khmer.Countgraph(30, 1e13, 1) assert 0, "this should fail" except MemoryError as err: print(str(err)) @@ -155,7 +155,7 @@ def test_3_tables(): x = list(PRIMES_1m) x.append(1000005) - hi = khmer._CountingHash(12, x) + hi = khmer._Countgraph(12, x) GG = 'G' * 12 # forward_hash: 11184810 assert khmer.forward_hash(GG, 12) == 11184810 @@ -186,7 +186,7 @@ def test_3_tables(): def test_simple_median(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") (median, average, stddev) = hi.get_median_count("AAAAAA") @@ -225,7 +225,7 @@ def test_simple_median(): def test_median_too_short(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") try: @@ -236,7 +236,7 @@ def test_median_too_short(): def test_median_at_least(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") assert hi.median_at_least("AAAAAA", 1) @@ -261,7 +261,7 @@ def test_median_at_least(): def test_median_at_least_single_gt(): K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) kmers = ['ATCGATCGATCGATCGATCG', 'GTACGTACGTACGTACGTAC', @@ -274,7 +274,7 @@ def test_median_at_least_single_gt(): def test_median_at_least_single_lt(): K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) kmers = ['ATCGATCGATCGATCGATCG', 'GTACGTACGTACGTACGTAC', @@ -288,7 +288,7 @@ def test_median_at_least_single_lt(): def test_median_at_least_odd_gt(): # test w/odd number of k-mers K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) seqs = ['ATCGATCGATCGATCGATCGCC', 'GTACGTACGTACGTACGTACCC', @@ -301,7 +301,7 @@ def test_median_at_least_odd_gt(): def test_median_at_least_odd_lt(): K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) seqs = ['ATCGATCGATCGATCGATCGCC', 'GTACGTACGTACGTACGTACCC', @@ -315,7 +315,7 @@ def test_median_at_least_odd_lt(): # Test median with even number of k-mers def test_median_at_least_even_gt(): K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) seqs = ['ATCGATCGATCGATCGATCGCCC', 'GTACGTACGTACGTACGTACCCC', @@ -328,7 +328,7 @@ def test_median_at_least_even_gt(): def test_median_at_least_even_lt(): K = 20 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) seqs = ['ATCGATCGATCGATCGATCGCCC', 'GTACGTACGTACGTACGTACCCC', @@ -342,7 +342,7 @@ def test_median_at_least_even_lt(): def test_median_at_least_comp(): K = 20 C = 4 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) seqs = ['ATCGATCGATCGATCGATCGCCC', 'GTACGTACGTACGTACGTACCCC', @@ -358,7 +358,7 @@ def test_median_at_least_comp(): def test_median_at_least_exception(): - ht = khmer.CountingHash(20, 1e6, 2) + ht = khmer.Countgraph(20, 1e6, 2) try: ht.median_at_least('ATGGCTGATCGAT', 1) assert 0, "should have thrown ValueError" @@ -367,25 +367,25 @@ def test_median_at_least_exception(): def test_simple_kadian(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") assert hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") == 1 - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") hi.consume("ACTGCTATCTCTAGAcCTATG") # ---------------^ x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") assert x == 2, x - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") hi.consume("ACTGCTATCTCTAGAcCTATG") # ---------------^---^ x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") assert x == 2 - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") hi.consume("ACTGCTATCTCTAGtGCTAcG") # --------------^^---^ @@ -394,11 +394,11 @@ def test_simple_kadian(): def test_simple_kadian_2(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") assert hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") == 1 - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACaGCTATCTCTAGAGCTATG") hi.consume("ACAGCTATCTCTAGAGCTATG") @@ -406,7 +406,7 @@ def test_simple_kadian_2(): x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") assert x == 2, x - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACaGCTATCTCTAGAcCTATG") hi.consume("ACAGCTATCTCTAGACCTATG") @@ -414,7 +414,7 @@ def test_simple_kadian_2(): x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG") assert x == 1, x - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACTGCTATCgCTAGAGCTATG") hi.consume("ACTGCTATCGCTAGAGCTATG") @@ -424,11 +424,11 @@ def test_simple_kadian_2(): def test_2_kadian(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") assert hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG", 2) == 1 - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACTGCTATCTCTAGAcCTATG") hi.consume("ACTGCTATCTCTAGACCTATG") @@ -436,14 +436,14 @@ def test_2_kadian(): x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG", 2) assert x == 2, x - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACTGCTATCTCTAGAcCTAtG") hi.consume("ACTGCTATCTCTAGACCTATG") # ---------------^---^ assert hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG", 2) == 2 - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACTGCTATCTCTACtcCTAtG") hi.consume("ACTGCTATCTCTACTCCTATG") @@ -451,7 +451,7 @@ def test_2_kadian(): x = hi.get_kadian_count("ACTGCTATCTCTAGAGCTATG", 2) assert x == 2, x - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("ACTGCTATCTCTAGAGCTATG") # hi.consume("ACTGCTgTCTCTACtcCTAtG") hi.consume("ACTGCTGTCTCTACTCCTATG") @@ -461,7 +461,7 @@ def test_2_kadian(): def test_get_kmer_counts_too_short(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") counts = hi.get_kmer_counts("A") @@ -469,7 +469,7 @@ def test_get_kmer_counts_too_short(): def test_get_kmer_hashes_too_short(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") hashes = hi.get_kmer_hashes("A") @@ -477,7 +477,7 @@ def test_get_kmer_hashes_too_short(): def test_get_kmers_too_short(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") kmers = hi.get_kmers("A") @@ -485,7 +485,7 @@ def test_get_kmers_too_short(): def test_get_kmer_counts(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") counts = hi.get_kmer_counts("AAAAAA") @@ -522,7 +522,7 @@ def test_get_kmer_counts(): def test_get_kmer_hashes(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume("AAAAAA") hashes = hi.get_kmer_hashes("AAAAAA") @@ -559,7 +559,7 @@ def test_get_kmer_hashes(): def test_get_kmers(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) kmers = hi.get_kmers("AAAAAA") assert kmers == ["AAAAAA"] @@ -576,7 +576,7 @@ def do_test(ctfile): sizes = khmer.get_n_primes_near_x(1, 2 ** 31 + 1000) - orig = khmer._CountingHash(12, sizes) + orig = khmer._Countgraph(12, sizes) orig.consume_fasta(inpath) orig.save(savepath) @@ -598,20 +598,20 @@ def test_save_load(): sizes = list(PRIMES_1m) sizes.append(1000005) - hi = khmer._CountingHash(12, sizes) + hi = khmer._Countgraph(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) - ht = khmer._CountingHash(12, sizes) + ht = khmer._Countgraph(12, sizes) try: ht.load(savepath) except OSError as err: assert 0, 'Should not produce an OSError: ' + str(err) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) x = hi.abundance_distribution(inpath, tracking) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) @@ -625,7 +625,7 @@ def test_load_truncated(): sizes = khmer.get_n_primes_near_x(3, 200) - hi = khmer._CountingHash(12, sizes) + hi = khmer._Countgraph(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) @@ -652,7 +652,7 @@ def test_load_gz(): sizes.append(1000005) # save uncompressed hashtable. - hi = khmer._CountingHash(12, sizes) + hi = khmer._Countgraph(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) @@ -664,16 +664,16 @@ def test_load_gz(): in_file.close() # load compressed hashtable. - ht = khmer._CountingHash(12, sizes) + ht = khmer._Countgraph(12, sizes) try: ht.load(loadpath) except OSError as err: assert 0, "Should not produce an OSError: " + str(err) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) x = hi.abundance_distribution(inpath, tracking) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) @@ -687,20 +687,20 @@ def test_save_load_gz(): sizes = list(PRIMES_1m) sizes.append(1000005) - hi = khmer._CountingHash(12, sizes) + hi = khmer._Countgraph(12, sizes) hi.consume_fasta(inpath) hi.save(savepath) - ht = khmer._CountingHash(12, sizes) + ht = khmer._Countgraph(12, sizes) try: ht.load(savepath) except OSError as err: assert 0, 'Should not produce an OSError: ' + str(err) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) x = hi.abundance_distribution(inpath, tracking) - tracking = khmer._Hashbits(12, sizes) + tracking = khmer._Nodegraph(12, sizes) y = ht.abundance_distribution(inpath, tracking) assert sum(x) == 3966, sum(x) @@ -719,7 +719,7 @@ def do_load_ct(fname): def test_trim_full(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume(DNA) hi.consume(DNA) @@ -729,7 +729,7 @@ def test_trim_full(): def test_trim_short(): - hi = khmer.CountingHash(6, 1e6, 2) + hi = khmer.Countgraph(6, 1e6, 2) hi.consume(DNA) hi.consume(DNA[:50]) @@ -741,7 +741,7 @@ def test_trim_short(): def test_find_spectral_error_positions_1(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) hi.consume(DNA) hi.consume(DNA[:30]) @@ -754,7 +754,7 @@ def test_find_spectral_error_positions_1(): def test_find_spectral_error_positions_2(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) hi.consume(DNA) hi.consume(DNA) @@ -764,7 +764,7 @@ def test_find_spectral_error_positions_2(): def test_find_spectral_error_positions_6(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) hi.consume(DNA) hi.consume(DNA[1:]) @@ -777,7 +777,7 @@ def test_find_spectral_error_positions_6(): def test_find_spectral_error_positions_4(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) hi.consume(DNA) @@ -786,7 +786,7 @@ def test_find_spectral_error_positions_4(): def test_find_spectral_error_positions_5(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) hi.consume(DNA) hi.consume(DNA[:10]) @@ -798,7 +798,7 @@ def test_find_spectral_error_positions_5(): def test_find_spectral_error_locs7(): K = 8 - hi = khmer.CountingHash(K, 1e6, 2) + hi = khmer.Countgraph(K, 1e6, 2) hi.consume(DNA) hi.consume(DNA[K:]) @@ -811,7 +811,7 @@ def test_find_spectral_error_locs7(): def test_find_spectral_error_positions_err(): - hi = khmer.CountingHash(8, 1e6, 2) + hi = khmer.Countgraph(8, 1e6, 2) try: posns = hi.find_spectral_error_positions(DNA[:6], 1) @@ -828,7 +828,7 @@ def test_find_spectral_error_positions_err(): def test_maxcount(): # hashtable should saturate at some point so as not to overflow counter - kh = khmer.CountingHash(4, 4 ** 4, 4) + kh = khmer.Countgraph(4, 4 ** 4, 4) kh.set_use_bigcount(False) last_count = None @@ -846,7 +846,7 @@ def test_maxcount(): def test_maxcount_with_bigcount(): # hashtable should not saturate, if use_bigcount is set. - kh = khmer.CountingHash(4, 4 ** 4, 4) + kh = khmer.Countgraph(4, 4 ** 4, 4) kh.set_use_bigcount(True) last_count = None @@ -864,7 +864,7 @@ def test_maxcount_with_bigcount(): def test_maxcount_with_bigcount_save(): # hashtable should not saturate, if use_bigcount is set. - kh = khmer.CountingHash(4, 4 ** 4, 4) + kh = khmer.Countgraph(4, 4 ** 4, 4) kh.set_use_bigcount(True) for i in range(0, 1000): @@ -874,7 +874,7 @@ def test_maxcount_with_bigcount_save(): savepath = utils.get_temp_filename('tempcountingsave.ht') kh.save(savepath) - kh = khmer.CountingHash(1, 1, 1) + kh = khmer.Countgraph(1, 1, 1) try: kh.load(savepath) except OSError as err: @@ -887,13 +887,13 @@ def test_maxcount_with_bigcount_save(): def test_bigcount_save(): # hashtable should not saturate, if use_bigcount is set. - kh = khmer.CountingHash(4, 4 ** 4, 4) + kh = khmer.Countgraph(4, 4 ** 4, 4) kh.set_use_bigcount(True) savepath = utils.get_temp_filename('tempcountingsave.ht') kh.save(savepath) - kh = khmer.CountingHash(1, 1, 1) + kh = khmer.Countgraph(1, 1, 1) try: kh.load(savepath) except OSError as err: @@ -911,13 +911,13 @@ def test_bigcount_save(): def test_nobigcount_save(): - kh = khmer.CountingHash(4, 4 ** 4, 4) + kh = khmer.Countgraph(4, 4 ** 4, 4) # kh.set_use_bigcount(False) <-- this is the default savepath = utils.get_temp_filename('tempcountingsave.ht') kh.save(savepath) - kh = khmer.CountingHash(1, 1, 1) + kh = khmer.Countgraph(1, 1, 1) try: kh.load(savepath) except OSError as err: @@ -935,8 +935,8 @@ def test_nobigcount_save(): def test_bigcount_abund_dist(): - kh = khmer.CountingHash(18, 1e2, 4) - tracking = khmer.Hashbits(18, 1e2, 4) + kh = khmer.Countgraph(18, 1e2, 4) + tracking = khmer.Nodegraph(18, 1e2, 4) kh.set_use_bigcount(True) seqpath = utils.get_test_data('test-abund-read-2.fa') @@ -951,8 +951,8 @@ def test_bigcount_abund_dist(): def test_bigcount_abund_dist_2(): - kh = khmer.CountingHash(18, 1e7, 4) - tracking = khmer.Hashbits(18, 1e7, 4) + kh = khmer.Countgraph(18, 1e7, 4) + tracking = khmer.Nodegraph(18, 1e7, 4) kh.set_use_bigcount(True) seqpath = utils.get_test_data('test-abund-read.fa') @@ -969,7 +969,7 @@ def test_bigcount_abund_dist_2(): def test_bigcount_overflow(): - kh = khmer.CountingHash(18, 1e7, 4) + kh = khmer.Countgraph(18, 1e7, 4) kh.set_use_bigcount(True) for i in range(0, 70000): @@ -979,12 +979,12 @@ def test_bigcount_overflow(): def test_get_ksize(): - kh = khmer.CountingHash(22, 1, 1) + kh = khmer.Countgraph(22, 1, 1) assert kh.ksize() == 22 def test_get_hashsizes(): - kh = khmer.CountingHash(22, 100, 4) + kh = khmer.Countgraph(22, 100, 4) # Py2/3 hack, longify converts to long in py2, remove once py2 isn't # supported any longer. expected = utils.longify([97, 89, 83, 79]) @@ -994,14 +994,14 @@ def test_get_hashsizes(): # def test_collect_high_abundance_kmers(): # seqpath = utils.get_test_data('test-abund-read-2.fa') # -# kh = khmer.CountingHash(18, 1e6, 4) +# kh = khmer.Countgraph(18, 1e6, 4) # hb = kh.collect_high_abundance_kmers(seqpath, 2, 4) def test_load_notexist_should_fail(): savepath = utils.get_temp_filename('tempcountingsave0.ht') - hi = khmer.CountingHash(12, 1000, 2) + hi = khmer.Countgraph(12, 1000, 2) try: hi.load(savepath) assert 0, "load should fail" @@ -1013,7 +1013,7 @@ def test_load_truncated_should_fail(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave0.ht') - hi = khmer.CountingHash(12, 1000, 2) + hi = khmer.Countgraph(12, 1000, 2) hi.consume_fasta(inpath) hi.save(savepath) @@ -1025,7 +1025,7 @@ def test_load_truncated_should_fail(): fp.write(data[:1000]) fp.close() - hi = khmer._CountingHash(12, [1]) + hi = khmer._Countgraph(12, [1]) try: hi.load(savepath) assert 0, "load should fail" @@ -1036,7 +1036,7 @@ def test_load_truncated_should_fail(): def test_load_gz_notexist_should_fail(): savepath = utils.get_temp_filename('tempcountingsave0.ht.gz') - hi = khmer.CountingHash(12, 1000, 2) + hi = khmer.Countgraph(12, 1000, 2) try: hi.load(savepath) assert 0, "load should fail" @@ -1048,7 +1048,7 @@ def test_load_gz_truncated_should_fail(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave0.ht.gz') - hi = khmer.CountingHash(12, 1000, 2) + hi = khmer.Countgraph(12, 1000, 2) hi.consume_fasta(inpath) hi.save(savepath) @@ -1060,7 +1060,7 @@ def test_load_gz_truncated_should_fail(): fp.write(data[:1000]) fp.close() - hi = khmer._CountingHash(12, [1]) + hi = khmer._Countgraph(12, [1]) try: hi.load(savepath) assert 0, "load should fail" @@ -1069,7 +1069,7 @@ def test_load_gz_truncated_should_fail(): def test_counting_file_version_check(): - ht = khmer.CountingHash(12, 1, 1) + ht = khmer.Countgraph(12, 1, 1) inpath = utils.get_test_data('badversion-k12.ct') @@ -1081,7 +1081,7 @@ def test_counting_file_version_check(): def test_counting_gz_file_version_check(): - ht = khmer.CountingHash(12, 1, 1) + ht = khmer.Countgraph(12, 1, 1) inpath = utils.get_test_data('badversion-k12.ct.gz') @@ -1095,7 +1095,7 @@ def test_counting_gz_file_version_check(): def test_counting_file_type_check(): inpath = utils.get_test_data('goodversion-k12.ht') - kh = khmer.CountingHash(12, 1, 1) + kh = khmer.Countgraph(12, 1, 1) try: kh.load(inpath) @@ -1105,11 +1105,11 @@ def test_counting_file_type_check(): def test_counting_gz_file_type_check(): - ht = khmer.Hashbits(12, 1, 1) + ht = khmer.Nodegraph(12, 1, 1) inpath = utils.get_test_data('goodversion-k12.ht.gz') - kh = khmer.CountingHash(12, 1, 1) + kh = khmer.Countgraph(12, 1, 1) try: kh.load(inpath) @@ -1120,14 +1120,14 @@ def test_counting_gz_file_type_check(): def test_counting_bad_primes_list(): try: - ht = khmer._CountingHash(12, ["a", "b", "c"], 1) + ht = khmer._Countgraph(12, ["a", "b", "c"], 1) assert 0, "bad list of primes should fail" except TypeError as e: print(str(e)) def test_bad_use_bigcount(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) countingtable.set_use_bigcount(True) assert countingtable.get_use_bigcount() try: @@ -1138,7 +1138,7 @@ def test_bad_use_bigcount(): def test_consume_absentfasta(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.consume_fasta("absent_file.fa") assert 0, "This should fail" @@ -1147,7 +1147,7 @@ def test_consume_absentfasta(): def test_consume_absentfasta_with_reads_parser(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.consume_fasta_with_reads_parser() assert 0, "this should fail" @@ -1164,7 +1164,7 @@ def test_consume_absentfasta_with_reads_parser(): def test_badconsume(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.consume() assert 0, "this should fail" @@ -1178,7 +1178,7 @@ def test_badconsume(): def test_get_badmin_count(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.get_min_count() assert 0, "this should fail" @@ -1192,7 +1192,7 @@ def test_get_badmin_count(): def test_get_badmax_count(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.get_max_count() assert 0, "this should fail" @@ -1206,7 +1206,7 @@ def test_get_badmax_count(): def test_get_badmedian_count(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.get_median_count() assert 0, "this should fail" @@ -1220,7 +1220,7 @@ def test_get_badmedian_count(): def test_get_badkadian_count(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.get_kadian_count() assert 0, "this should fail" @@ -1234,7 +1234,7 @@ def test_get_badkadian_count(): def test_badget(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.get() assert 0, "this should fail" @@ -1243,7 +1243,7 @@ def test_badget(): def test_badget_2(): - countingtable = khmer.CountingHash(6, 1e6, 2) + countingtable = khmer.Countgraph(6, 1e6, 2) countingtable.consume(DNA) @@ -1259,7 +1259,7 @@ def test_badget_2(): def test_badtrim(): - countingtable = khmer.CountingHash(6, 1e6, 2) + countingtable = khmer.Countgraph(6, 1e6, 2) countingtable.consume(DNA) try: @@ -1271,7 +1271,7 @@ def test_badtrim(): def test_badfasta_count_kmers_by_position(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.fasta_count_kmers_by_position() except TypeError as err: @@ -1291,7 +1291,7 @@ def test_badfasta_count_kmers_by_position(): def test_badload(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.load() assert 0, "this should fail" @@ -1300,7 +1300,7 @@ def test_badload(): def test_badsave(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.save() assert 0, "this should fail" @@ -1309,7 +1309,7 @@ def test_badsave(): def test_badksize(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.ksize(True) assert 0, "this should fail" @@ -1318,7 +1318,7 @@ def test_badksize(): def test_badhashsizes(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.hashsizes(True) assert 0, "this should fail" @@ -1327,7 +1327,7 @@ def test_badhashsizes(): def test_badconsume_and_tag(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.consume_and_tag() assert 0, "this should fail" @@ -1336,7 +1336,7 @@ def test_badconsume_and_tag(): def test_consume_fasta_and_tag(): - countingtable = khmer.CountingHash(4, 4 ** 4, 4) + countingtable = khmer.Countgraph(4, 4 ** 4, 4) try: countingtable.consume_fasta_and_tag() assert 0, "this should fail" @@ -1346,7 +1346,7 @@ def test_consume_fasta_and_tag(): def test_consume_and_retrieve_tags_1(): - ct = khmer.CountingHash(4, 4 ** 4, 4) + ct = khmer.Countgraph(4, 4 ** 4, 4) # first, for each sequence, build tags. for record in screed.open(utils.get_test_data('test-graph2.fa')): @@ -1369,7 +1369,7 @@ def test_consume_and_retrieve_tags_1(): def test_consume_and_retrieve_tags_empty(): - ct = khmer.CountingHash(4, 4 ** 4, 4) + ct = khmer.Countgraph(4, 4 ** 4, 4) # load each sequence but do not build tags - everything should be empty. for record in screed.open(utils.get_test_data('test-graph2.fa')): @@ -1393,7 +1393,7 @@ def test_consume_and_retrieve_tags_empty(): def test_find_all_tags_list_error(): - ct = khmer.CountingHash(4, 4 ** 4, 4) + ct = khmer.Countgraph(4, 4 ** 4, 4) # load each sequence but do not build tags - everything should be empty. for record in screed.open(utils.get_test_data('test-graph2.fa')): @@ -1432,7 +1432,7 @@ def test_abund_dist_gz_bigcount(): assert 0, 'Should not produce OSError: ' + str(err) hashsizes = countinggraph.hashsizes() kmer_size = countinggraph.ksize() - tracking = khmer._Hashbits(kmer_size, hashsizes) + tracking = khmer._Nodegraph(kmer_size, hashsizes) abundances = countinggraph.abundance_distribution(infile, tracking) # calculate abundance distribution for compressed bigcount table flag = False @@ -1447,7 +1447,7 @@ def test_abund_dist_gz_bigcount(): def test_counting_load_bigcount(): - count_table = khmer.CountingHash(10, 1e5, 4) + count_table = khmer.Countgraph(10, 1e5, 4) count_table.set_use_bigcount(True) for i in range(500): print(i, count_table.count('ATATATATAT')) diff --git a/tests/test_counting_single.py b/tests/test_counting_single.py index f0d1e91b6f..7f81898790 100644 --- a/tests/test_counting_single.py +++ b/tests/test_counting_single.py @@ -19,14 +19,14 @@ @attr('huge') def test_toobig(): try: - ct = khmer.CountingHash(4, 1000000000000, 1) + ct = khmer.Countgraph(4, 1000000000000, 1) assert 0, "this should fail" except MemoryError as err: print(str(err)) def test_collision(): - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) kh.count('AAAA') assert kh.get('AAAA') == 1 @@ -36,7 +36,7 @@ def test_collision(): def test_badcount(): - countingtable = khmer._CountingHash(4, [5]) + countingtable = khmer._Countgraph(4, [5]) try: countingtable.count() assert 0, "count should require one argument" @@ -50,7 +50,7 @@ def test_badcount(): def test_hashtable_n_entries(): - countingtable = khmer._CountingHash(4, [5]) + countingtable = khmer._Countgraph(4, [5]) try: countingtable.n_entries("nope") assert 0, "n_entries should accept no arguments" @@ -59,7 +59,7 @@ def test_hashtable_n_entries(): def test_complete_no_collision(): - kh = khmer._CountingHash(4, [4 ** 4]) + kh = khmer._Countgraph(4, [4 ** 4]) for i in range(0, kh.n_entries()): s = khmer.reverse_hash(i, 4) @@ -85,7 +85,7 @@ def test_complete_no_collision(): def test_complete_2_collision(): - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) for i in range(0, kh.n_entries()): s = khmer.reverse_hash(i, 4) @@ -106,7 +106,7 @@ def test_complete_2_collision(): def test_complete_4_collision(): - kh = khmer._CountingHash(4, [3]) + kh = khmer._Countgraph(4, [3]) for i in range(0, kh.n_entries()): s = khmer.reverse_hash(i, 4) @@ -128,7 +128,7 @@ def test_complete_4_collision(): def test_maxcount(): # hashtable should saturate at some point so as not to overflow counter - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) last_count = None for _ in range(0, 10000): @@ -146,7 +146,7 @@ def test_maxcount(): def test_maxcount_with_bigcount(): # hashtable should not saturate, if use_bigcount is set. - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) kh.set_use_bigcount(True) last_count = None @@ -164,7 +164,7 @@ def test_maxcount_with_bigcount(): def test_consume_uniqify_first(): - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) s = "TTTT" s_rc = "AAAA" @@ -176,7 +176,7 @@ def test_consume_uniqify_first(): def test_maxcount_consume(): # hashtable should saturate at some point so as not to overflow counter - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) s = "A" * 10000 kh.consume(s) @@ -187,7 +187,7 @@ def test_maxcount_consume(): def test_maxcount_consume_with_bigcount(): # use the bigcount hack to avoid saturating the hashtable. - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) kh.set_use_bigcount(True) s = "A" * 10000 @@ -198,7 +198,7 @@ def test_maxcount_consume_with_bigcount(): def test_get_mincount(): - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) s = "AAAAACGT" kh.consume(s) @@ -212,7 +212,7 @@ def test_get_mincount(): def test_get_maxcount(): - kh = khmer._CountingHash(4, [7]) + kh = khmer._Countgraph(4, [7]) s = "AAAAACGT" kh.consume(s) @@ -226,7 +226,7 @@ def test_get_maxcount(): def test_get_maxcount_rc(): - kh = khmer._CountingHash(4, [7]) + kh = khmer._Countgraph(4, [7]) s = "AAAAACGT" src = "ACGTTTTT" @@ -241,7 +241,7 @@ def test_get_maxcount_rc(): def test_get_mincount_rc(): - kh = khmer._CountingHash(4, [5]) + kh = khmer._Countgraph(4, [5]) s = "AAAAACGT" src = "ACGTTTTT" @@ -256,7 +256,7 @@ def test_get_mincount_rc(): def test_badget(): - kh = khmer.CountingHash(6, 4 ** 10, 1) + kh = khmer.Countgraph(6, 4 ** 10, 1) DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG" @@ -274,7 +274,7 @@ def test_badget(): def test_64bitshift(): - kh = khmer.CountingHash(25, 4, 1) + kh = khmer.Countgraph(25, 4, 1) fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG" substr = "ATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGC" @@ -283,7 +283,7 @@ def test_64bitshift(): def test_64bitshift_2(): - kh = khmer.CountingHash(25, 4, 1) + kh = khmer.Countgraph(25, 4, 1) fullstr = "GTATGCCAGCTCCAACTGGGCCGGTACGAGCAGGCCATTGCCTCTTGCCGCGATGCGTCGGCG" kh.consume(fullstr) @@ -294,12 +294,12 @@ def test_64bitshift_2(): def test_very_short_read(): short_filename = utils.get_test_data('test-short.fa') - kh = khmer.CountingHash(9, 4, 1) + kh = khmer.Countgraph(9, 4, 1) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1, n_reads assert n_kmers == 0, n_kmers - kh = khmer.CountingHash(8, 4, 1) + kh = khmer.Countgraph(8, 4, 1) n_reads, n_kmers = kh.consume_fasta(short_filename) assert n_reads == 1, n_reads assert n_kmers == 1, n_kmers @@ -308,7 +308,7 @@ def test_very_short_read(): class Test_ConsumeString(object): def setup(self): - self.kh = khmer._CountingHash(4, [4 ** 4]) + self.kh = khmer._Countgraph(4, [4 ** 4]) def test_n_occupied(self): assert self.kh.n_occupied() == 0 @@ -404,14 +404,14 @@ def test_max_count(self): class Test_AbundanceDistribution(object): def setup(self): - self.kh = khmer._CountingHash(4, [5]) + self.kh = khmer._Countgraph(4, [5]) A_filename = utils.get_test_data('all-A.fa') self.kh.consume_fasta(A_filename) def test_count_A(self): A_filename = utils.get_test_data('all-A.fa') - tracking = khmer._Hashbits(4, [5]) + tracking = khmer._Nodegraph(4, [5]) dist = self.kh.abundance_distribution(A_filename, tracking) assert sum(dist) == 1 diff --git a/tests/test_filter.py b/tests/test_filter.py index 3af64ffb40..169a62a18f 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -27,7 +27,7 @@ def load_fa_seq_names(filename): class Test_Filter(object): def test_abund(self): - ht = khmer.CountingHash(10, 4 ** 10, 1) + ht = khmer.Countgraph(10, 4 ** 10, 1) filename = utils.get_test_data('test-abund-read.fa') outname = utils.get_temp_filename('test_abund.out') diff --git a/tests/test_functions.py b/tests/test_functions.py index 65708c649e..07c74d78fd 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -103,23 +103,23 @@ def test_get_primes_fal(): assert "unable to find 5 prime numbers < 5" in str(err) -def test_extract_countinghash_info_badfile(): +def test_extract_countgraph_info_badfile(): try: - khmer.extract_countinghash_info( + khmer.extract_countgraph_info( utils.get_test_data('test-abund-read-2.fa')) assert 0, 'this should fail' except ValueError: pass -def test_extract_countinghash_info(): +def test_extract_countgraph_info(): fn = utils.get_temp_filename('test_extract_counting.ct') for size in [1e6, 2e6, 5e6, 1e7]: - ht = khmer.CountingHash(25, size, 4) + ht = khmer.Countgraph(25, size, 4) ht.save(fn) try: - info = khmer.extract_countinghash_info(fn) + info = khmer.extract_countgraph_info(fn) except ValueError as err: assert 0, 'Should not throw a ValueErorr: ' + str(err) ksize, table_size, n_tables, _, _, _ = info @@ -147,7 +147,7 @@ def test_extract_nodegraph_info_badfile(): def test_extract_nodegraph_info(): fn = utils.get_temp_filename('test_extract_nodegraph.pt') for size in [1e6, 2e6, 5e6, 1e7]: - ht = khmer.Hashbits(25, size, 4) + ht = khmer.Nodegraph(25, size, 4) ht.save(fn) info = khmer.extract_nodegraph_info(fn) diff --git a/tests/test_graph.py b/tests/test_graph.py index 5afcb92671..1963d95388 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -21,7 +21,7 @@ def teardown(): class Test_ExactGraphFu(object): def setup(self): - self.ht = khmer.Hashbits(12, 1e4, 2) + self.ht = khmer.Nodegraph(12, 1e4, 2) def test_counts(self): ht = self.ht @@ -115,7 +115,7 @@ def test_graph_links_prev_t(self): class Test_InexactGraphFu(object): def setup(self): - self.ht = khmer.Hashbits(12, 4 ** 3 + 1, 2) + self.ht = khmer.Nodegraph(12, 4 ** 3 + 1, 2) def test_graph_links_next_a(self): ht = self.ht @@ -199,7 +199,7 @@ def test_output_unassigned(self): filename = utils.get_test_data('random-20-a.fa') - ht = khmer._Hashbits(21, [5, 7, 11, 13]) + ht = khmer._Nodegraph(21, [5, 7, 11, 13]) ht.consume_fasta_and_tag(filename) output_file = utils.get_temp_filename('part0test') @@ -216,7 +216,7 @@ def test_not_output_unassigned(self): filename = utils.get_test_data('random-20-a.fa') - ht = khmer._Hashbits(21, [5, 7, 11, 13]) + ht = khmer._Nodegraph(21, [5, 7, 11, 13]) ht.consume_fasta_and_tag(filename) output_file = utils.get_temp_filename('parttest') @@ -231,7 +231,7 @@ def test_not_output_unassigned(self): def test_output_fq(self): filename = utils.get_test_data('random-20-a.fq') - ht = khmer.Hashbits(20, 1e4, 4) + ht = khmer.Nodegraph(20, 1e4, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) ht.merge_subset(subset) @@ -247,7 +247,7 @@ def test_output_fq(self): def test_disconnected_20_a(self): filename = utils.get_test_data('random-20-a.fa') - ht = khmer.Hashbits(21, 1e5, 4) + ht = khmer.Nodegraph(21, 1e5, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -257,7 +257,7 @@ def test_disconnected_20_a(self): def test_connected_20_a(self): filename = utils.get_test_data('random-20-a.fa') - ht = khmer.Hashbits(20, 1e4, 4) + ht = khmer.Nodegraph(20, 1e4, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -267,7 +267,7 @@ def test_connected_20_a(self): def test_disconnected_20_b(self): filename = utils.get_test_data('random-20-b.fa') - ht = khmer.Hashbits(21, 1e4, 4) + ht = khmer.Nodegraph(21, 1e4, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -277,7 +277,7 @@ def test_disconnected_20_b(self): def test_connected_20_b(self): filename = utils.get_test_data('random-20-b.fa') - ht = khmer.Hashbits(20, 1e4, 4) + ht = khmer.Nodegraph(20, 1e4, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -287,7 +287,7 @@ def test_connected_20_b(self): def test_disconnected_31_c(self): filename = utils.get_test_data('random-31-c.fa') - ht = khmer.Hashbits(32, 1e6, 4) + ht = khmer.Nodegraph(32, 1e6, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -297,7 +297,7 @@ def test_disconnected_31_c(self): def test_connected_31_c(self): filename = utils.get_test_data('random-31-c.fa') - ht = khmer.Hashbits(31, 1e5, 4) + ht = khmer.Nodegraph(31, 1e5, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -310,7 +310,7 @@ def test_connected_31_c(self): class Test_PythonAPI(object): def test_find_all_tags_kmersize(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) a = "ATTGGGACTCTGGGAGCACTTATCATGGAGAT" b = "GAGCACTTTAACCCTGCAGAGTGGCCAAGGCT" @@ -330,7 +330,7 @@ def test_find_all_tags_kmersize(self): pass def test_ordered_connect(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) a = "ATTGGGACTCTGGGAGCACTTATCATGGAGAT" b = "GAGCACTTTAACCCTGCAGAGTGGCCAAGGCT" diff --git a/tests/test_lump.py b/tests/test_lump.py index 511ee009d1..406b8659eb 100644 --- a/tests/test_lump.py +++ b/tests/test_lump.py @@ -19,7 +19,7 @@ def test_fakelump_together(): fakelump_fa = utils.get_test_data('fakelump.fa') - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) subset = ht.do_subset_partition(0, 0) @@ -35,7 +35,7 @@ def test_fakelump_stop(): fakelump_fa = utils.get_test_data('fakelump.fa') fakelump_stoptags_txt = utils.get_test_data('fakelump.fa.stoptags.txt') - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) for line in open(fakelump_stoptags_txt): @@ -53,7 +53,7 @@ def test_fakelump_stop(): def test_fakelump_stop2(): fakelump_fa = utils.get_test_data('fakelump.fa') - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) ht.add_stop_tag('GGGGAGGGGTGCAGTTGTGACTTGCTCGAGAG') @@ -71,7 +71,7 @@ def test_fakelump_repartitioning(): fakelump_fa = utils.get_test_data('fakelump.fa') fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo') - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) subset = ht.do_subset_partition(0, 0) @@ -88,7 +88,7 @@ def test_fakelump_repartitioning(): EXCURSION_DISTANCE = 40 EXCURSION_KMER_THRESHOLD = 82 EXCURSION_KMER_COUNT_THRESHOLD = 1 - counting = khmer.CountingHash(32, 1e5, 4) + counting = khmer.Countgraph(32, 1e5, 4) ht.repartition_largest_partition(None, counting, EXCURSION_DISTANCE, @@ -99,7 +99,7 @@ def test_fakelump_repartitioning(): # ok, now re-do everything with these stop tags, specifically. - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) ht.load_stop_tags(fakelump_fa_foo) @@ -114,7 +114,7 @@ def test_fakelump_load_stop_tags_trunc(): fakelump_fa = utils.get_test_data('fakelump.fa') fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo') - ht = khmer.Hashbits(32, 1e5, 4) + ht = khmer.Nodegraph(32, 1e5, 4) ht.consume_fasta_and_tag(fakelump_fa) subset = ht.do_subset_partition(0, 0) @@ -131,7 +131,7 @@ def test_fakelump_load_stop_tags_trunc(): EXCURSION_DISTANCE = 40 EXCURSION_KMER_THRESHOLD = 82 EXCURSION_KMER_COUNT_THRESHOLD = 1 - counting = khmer._CountingHash(32, [5, 7, 11, 13]) + counting = khmer._Countgraph(32, [5, 7, 11, 13]) ht.repartition_largest_partition(None, counting, EXCURSION_DISTANCE, @@ -146,7 +146,7 @@ def test_fakelump_load_stop_tags_trunc(): fp.close() # ok, now try loading these stop tags; should fail. - ht = khmer._Hashbits(32, [5, 7, 11, 13]) + ht = khmer._Nodegraph(32, [5, 7, 11, 13]) ht.consume_fasta_and_tag(fakelump_fa) try: @@ -160,7 +160,7 @@ def test_fakelump_load_stop_tags_notexist(): fakelump_fa_foo = utils.get_temp_filename('fakelump.fa.stopfoo') # ok, now try loading these stop tags; should fail. - ht = khmer._Hashbits(32, [5, 7, 11, 13]) + ht = khmer._Nodegraph(32, [5, 7, 11, 13]) try: ht.load_stop_tags(fakelump_fa_foo) diff --git a/tests/test_hashbits.py b/tests/test_nodegraph.py similarity index 88% rename from tests/test_hashbits.py rename to tests/test_nodegraph.py index d7fd3fa1a2..38682a2c20 100644 --- a/tests/test_hashbits.py +++ b/tests/test_nodegraph.py @@ -24,14 +24,14 @@ def teardown(): @attr('huge') def test_toobig(): try: - pt = khmer.Hashbits(32, 1e13, 1) + pt = khmer.Nodegraph(32, 1e13, 1) assert 0, "This should fail" except MemoryError as err: print(str(err)) def test__get_set_tag_density(): - htableable = khmer._Hashbits(32, [1]) + htableable = khmer._Nodegraph(32, [1]) orig = htableable._get_tag_density() assert orig != 2 @@ -40,8 +40,8 @@ def test__get_set_tag_density(): def test_update_from(): - htableable = khmer.Hashbits(5, 1000, 4) - other_htableable = khmer.Hashbits(5, 1000, 4) + htableable = khmer.Nodegraph(5, 1000, 4) + other_htableable = khmer.Nodegraph(5, 1000, 4) assert htableable.get('AAAAA') == 0 assert htableable.get('GCGCG') == 0 @@ -71,8 +71,8 @@ def test_update_from(): def test_update_from_diff_ksize_2(): - htableable = khmer.Hashbits(5, 1000, 4) - other_htableable = khmer.Hashbits(4, 1000, 4) + htableable = khmer.Nodegraph(5, 1000, 4) + other_htableable = khmer.Nodegraph(4, 1000, 4) try: htableable.update(other_htableable) @@ -88,8 +88,8 @@ def test_update_from_diff_ksize_2(): def test_update_from_diff_tablesize(): - htableable = khmer.Hashbits(5, 100, 4) - other_htableable = khmer.Hashbits(5, 1000, 4) + htableable = khmer.Nodegraph(5, 100, 4) + other_htableable = khmer.Nodegraph(5, 1000, 4) try: htableable.update(other_htableable) @@ -99,8 +99,8 @@ def test_update_from_diff_tablesize(): def test_update_from_diff_num_tables(): - htableable = khmer.Hashbits(5, 1000, 3) - other_htableable = khmer.Hashbits(5, 1000, 4) + htableable = khmer.Nodegraph(5, 1000, 3) + other_htableable = khmer.Nodegraph(5, 1000, 4) try: htableable.update(other_htableable) @@ -117,7 +117,7 @@ def test_n_occupied_1(): num_htableables = 1 # number of hashtableables # test modified c++ n_occupied code - htableable = khmer.Hashbits(ksize, htable_size, num_htableables) + htableable = khmer.Nodegraph(ksize, htable_size, num_htableables) for _, record in enumerate(fasta_iter(open(filename))): htableable.consume(record['sequence']) @@ -134,7 +134,7 @@ def test_bloom_python_1(): htable_size = 100000 # size of hashtableable num_htableables = 3 # number of hashtableables - htableable = khmer.Hashbits(ksize, htable_size, num_htableables) + htableable = khmer.Nodegraph(ksize, htable_size, num_htableables) n_unique = 0 for _, record in enumerate(fasta_iter(open(filename))): @@ -162,7 +162,7 @@ def test_bloom_c_1(): htable_size = 100000 # size of hashtableable num_htableables = 3 # number of hashtableables - htableable = khmer.Hashbits(ksize, htable_size, num_htableables) + htableable = khmer.Nodegraph(ksize, htable_size, num_htableables) for _, record in enumerate(fasta_iter(open(filename))): htableable.consume(record['sequence']) @@ -176,7 +176,7 @@ def test_n_occupied_2(): # simple one htable_size = 10 # use 11 num_htableables = 1 - htableable = khmer._Hashbits(ksize, [11]) + htableable = khmer._Nodegraph(ksize, [11]) htableable.count('AAAA') # 00 00 00 00 = 0 assert htableable.n_occupied() == 1 @@ -194,7 +194,7 @@ def test_bloom_c_2(): # simple one ksize = 4 # use only 1 hashtableable, no bloom filter - htableable = khmer._Hashbits(ksize, [11]) + htableable = khmer._Nodegraph(ksize, [11]) htableable.count('AAAA') # 00 00 00 00 = 0 htableable.count('ACTG') # 00 10 01 11 = assert htableable.n_unique_kmers() == 2 @@ -204,7 +204,7 @@ def test_bloom_c_2(): # simple one assert htableable.n_unique_kmers() == 2 # use two hashtableables with 11,13 - other_htableable = khmer._Hashbits(ksize, [11, 13]) + other_htableable = khmer._Nodegraph(ksize, [11, 13]) other_htableable.count('AAAA') # 00 00 00 00 = 0 other_htableable.count('ACTG') # 00 10 01 11 = 2*16 +4 +3 = 39 @@ -220,7 +220,7 @@ def test_bloom_c_2(): # simple one def test_filter_if_present(): - htable = khmer._Hashbits(32, [3, 5]) + htable = khmer._Nodegraph(32, [3, 5]) maskfile = utils.get_test_data('filter-test-A.fa') inputfile = utils.get_test_data('filter-test-B.fa') @@ -236,7 +236,7 @@ def test_filter_if_present(): def test_combine_pe(): inpfile = utils.get_test_data('combine_parts_1.fa') - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) htable.consume_partitioned_fasta(inpfile) assert htable.count_partitions() == (2, 0) @@ -261,7 +261,7 @@ def test_combine_pe(): def test_load_partitioned(): inpfile = utils.get_test_data('combine_parts_1.fa') - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) htable.consume_partitioned_fasta(inpfile) assert htable.count_partitions() == (2, 0) @@ -278,7 +278,7 @@ def test_load_partitioned(): def test_count_within_radius_simple(): inpfile = utils.get_test_data('all-A.fa') - htable = khmer._Hashbits(4, [3, 5]) + htable = khmer._Nodegraph(4, [3, 5]) print(htable.consume_fasta(inpfile)) n = htable.count_kmers_within_radius('AAAA', 1) @@ -290,13 +290,13 @@ def test_count_within_radius_simple(): def test_count_within_radius_big(): inpfile = utils.get_test_data('random-20-a.fa') - htable = khmer.Hashbits(20, 1e5, 4) + htable = khmer.Nodegraph(20, 1e5, 4) htable.consume_fasta(inpfile) n = htable.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGG', int(1e6)) assert n == 3961, n - htable = khmer.Hashbits(21, 1e5, 4) + htable = khmer.Nodegraph(21, 1e5, 4) htable.consume_fasta(inpfile) n = htable.count_kmers_within_radius('CGCAGGCTGGATTCTAGAGGC', int(1e6)) assert n == 39 @@ -304,7 +304,7 @@ def test_count_within_radius_big(): def test_count_kmer_degree(): inpfile = utils.get_test_data('all-A.fa') - htable = khmer._Hashbits(4, [3, 5]) + htable = khmer._Nodegraph(4, [3, 5]) htable.consume_fasta(inpfile) assert htable.kmer_degree('AAAA') == 2 @@ -314,7 +314,7 @@ def test_count_kmer_degree(): def test_save_load_tagset(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) outfile = utils.get_temp_filename('tagset') @@ -336,7 +336,7 @@ def test_save_load_tagset(): def test_save_load_tagset_noclear(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) outfile = utils.get_temp_filename('tagset') @@ -364,7 +364,7 @@ def test_stop_traverse(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) # without tagging/joining across consume, this breaks into two partition; # with, it is one partition. @@ -386,7 +386,7 @@ def test_tag_across_stoptraverse(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) # without tagging/joining across consume, this breaks into two partition; # with, it is one partition. @@ -414,7 +414,7 @@ def test_notag_across_stoptraverse(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) # connecting k-mer at the beginning/end of a read: breaks up into two. htable.add_stop_tag('TTGCATACGTTGAGCCAGCG') @@ -429,7 +429,7 @@ def test_notag_across_stoptraverse(): def test_find_stoptags(): - htable = khmer._Hashbits(5, [1]) + htable = khmer._Nodegraph(5, [1]) htable.add_stop_tag("AAAAA") assert htable.identify_stoptags_by_position("AAAAA") == [0] @@ -439,7 +439,7 @@ def test_find_stoptags(): def test_find_stoptagsecond_seq(): - htable = khmer._Hashbits(4, [1]) + htable = khmer._Nodegraph(4, [1]) htable.add_stop_tag("ATGC") x = htable.identify_stoptags_by_position("ATGCATGCGCAT") @@ -447,12 +447,12 @@ def test_find_stoptagsecond_seq(): def test_get_ksize(): - kh = khmer._Hashbits(22, [1]) + kh = khmer._Nodegraph(22, [1]) assert kh.ksize() == 22 def test_get_hashsizes(): - kh = khmer.Hashbits(22, 100, 4) + kh = khmer.Nodegraph(22, 100, 4) # Py2/3 hack, longify converts to long in py2, remove once py2 isn't # supported any longer. expected = utils.longify([97, 89, 83, 79]) @@ -460,7 +460,7 @@ def test_get_hashsizes(): def test_extract_unique_paths_0(): - kh = khmer._Hashbits(10, [5, 7, 11, 13]) + kh = khmer._Nodegraph(10, [5, 7, 11, 13]) x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) assert x == ['ATGGAGAGACACAGATAGACAGGAGTGGCGATG'] @@ -471,7 +471,7 @@ def test_extract_unique_paths_0(): def test_extract_unique_paths_1(): - kh = khmer._Hashbits(10, [5, 7, 11, 13]) + kh = khmer._Nodegraph(10, [5, 7, 11, 13]) kh.consume('AGTGGCGATG') x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) @@ -480,7 +480,7 @@ def test_extract_unique_paths_1(): def test_extract_unique_paths_2(): - kh = khmer._Hashbits(10, [5, 7, 11, 13]) + kh = khmer._Nodegraph(10, [5, 7, 11, 13]) kh.consume('ATGGAGAGAC') x = kh.extract_unique_paths('ATGGAGAGACACAGATAGACAGGAGTGGCGATG', 10, 1) @@ -489,7 +489,7 @@ def test_extract_unique_paths_2(): def test_extract_unique_paths_3(): - kh = khmer._Hashbits(10, [5, 7, 11, 13]) + kh = khmer._Nodegraph(10, [5, 7, 11, 13]) kh.consume('ATGGAGAGAC') kh.consume('AGTGGCGATG') @@ -500,7 +500,7 @@ def test_extract_unique_paths_3(): def test_extract_unique_paths_4(): - kh = khmer.Hashbits(10, 1e6, 4) + kh = khmer.Nodegraph(10, 1e6, 4) kh.consume('ATGGAGAGAC') kh.consume('AGTGGCGATG') @@ -520,7 +520,7 @@ def test_find_unpart(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) htable.consume_fasta_and_tag(filename) subset = htable.do_subset_partition(0, 0) @@ -542,7 +542,7 @@ def test_find_unpart_notraverse(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) htable.consume_fasta_and_tag(filename) subset = htable.do_subset_partition(0, 0) @@ -564,7 +564,7 @@ def test_find_unpart_fail(): htable_size = 1e4 # size of hashtableable num_htableables = 3 # number of hashtableables - htable = khmer.Hashbits(ksize, htable_size, num_htableables) + htable = khmer.Nodegraph(ksize, htable_size, num_htableables) htable.consume_fasta_and_tag(filename) subset = htable.do_subset_partition(0, 0) @@ -579,7 +579,7 @@ def test_find_unpart_fail(): def test_simple_median(): - hi = khmer.Hashbits(6, 1e5, 2) + hi = khmer.Nodegraph(6, 1e5, 2) (median, average, stddev) = hi.get_median_count("AAAAAA") print(median, average, stddev) @@ -596,7 +596,7 @@ def test_simple_median(): def test_badget(): - hbts = khmer.Hashbits(6, 1e6, 1) + hbts = khmer.Nodegraph(6, 1e6, 1) dna = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG" @@ -625,7 +625,7 @@ def test_badget(): def test_load_notexist_should_fail(): savepath = utils.get_temp_filename('tempnodegraphsave0.htable') - hi = khmer._CountingHash(12, [1]) + hi = khmer._Countgraph(12, [1]) try: hi.load(savepath) assert 0, "load should fail" @@ -637,7 +637,7 @@ def test_load_truncated_should_fail(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempnodegraphsave0.ct') - hi = khmer.CountingHash(12, 1000, 2) + hi = khmer.Countgraph(12, 1000, 2) hi.consume_fasta(inpath) hi.save(savepath) @@ -650,7 +650,7 @@ def test_load_truncated_should_fail(): fp.write(data[:1000]) fp.close() - hi = khmer._CountingHash(12, [1]) + hi = khmer._Countgraph(12, [1]) try: hi.load(savepath) assert 0, "load should fail" @@ -659,7 +659,7 @@ def test_load_truncated_should_fail(): def test_save_load_tagset_notexist(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) outfile = utils.get_temp_filename('tagset') try: @@ -670,7 +670,7 @@ def test_save_load_tagset_notexist(): def test_save_load_tagset_trunc(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) outfile = utils.get_temp_filename('tagset') @@ -711,13 +711,13 @@ def _build_testfiles(): # nodegraph file inpath = utils.get_test_data('random-20-a.fa') - hi = khmer.Hashbits(12, 2) + hi = khmer.Nodegraph(12, 2) hi.consume_fasta(inpath) hi.save('/tmp/goodversion-k12.htable') # tagset file - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) htable.add_tag('A' * 32) htable.add_tag('G' * 32) @@ -727,7 +727,7 @@ def _build_testfiles(): fakelump_fa = utils.get_test_data('fakelump.fa') - htable = khmer.Hashbits(32, 4, 4) + htable = khmer.Nodegraph(32, 4, 4) htable.consume_fasta_and_tag(fakelump_fa) subset = htable.do_subset_partition(0, 0) @@ -736,7 +736,7 @@ def _build_testfiles(): EXCURSION_DISTANCE = 40 EXCURSION_ksizeMER_THRESHOLD = 82 EXCURSION_ksizeMER_COUNT_THRESHOLD = 1 - counting = khmer.CountingHash(32, 4, 4) + counting = khmer.Countgraph(32, 4, 4) htable.repartition_largest_partition(None, counting, EXCURSION_DISTANCE, @@ -747,7 +747,7 @@ def _build_testfiles(): def test_nodegraph_file_version_check(): - htable = khmer._Hashbits(12, [1]) + htable = khmer._Nodegraph(12, [1]) inpath = utils.get_test_data('badversion-k12.htable') @@ -759,11 +759,11 @@ def test_nodegraph_file_version_check(): def test_nodegraph_file_type_check(): - kh = khmer._CountingHash(12, [1]) + kh = khmer._Countgraph(12, [1]) savepath = utils.get_temp_filename('tempcountingsave0.ct') kh.save(savepath) - htable = khmer._Hashbits(12, [1]) + htable = khmer._Nodegraph(12, [1]) try: htable.load(savepath) @@ -773,7 +773,7 @@ def test_nodegraph_file_type_check(): def test_stoptags_file_version_check(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) inpath = utils.get_test_data('badversion-k32.stoptags') @@ -785,7 +785,7 @@ def test_stoptags_file_version_check(): def test_stoptags_ksize_check(): - htable = khmer._Hashbits(31, [1]) + htable = khmer._Nodegraph(31, [1]) inpath = utils.get_test_data('goodversion-k32.stoptags') try: @@ -796,7 +796,7 @@ def test_stoptags_ksize_check(): def test_stop_tags_filetype_check(): - htable = khmer._Hashbits(31, [1]) + htable = khmer._Nodegraph(31, [1]) inpath = utils.get_test_data('goodversion-k32.tagset') try: @@ -807,7 +807,7 @@ def test_stop_tags_filetype_check(): def test_tagset_file_version_check(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) inpath = utils.get_test_data('badversion-k32.tagset') @@ -819,7 +819,7 @@ def test_tagset_file_version_check(): def test_stop_tags_truncate_check(): - htable = khmer._Hashbits(32, [1]) + htable = khmer._Nodegraph(32, [1]) inpath = utils.get_test_data('goodversion-k32.tagset') data = open(inpath, 'rb').read() @@ -838,7 +838,7 @@ def test_stop_tags_truncate_check(): def test_tagset_ksize_check(): - htable = khmer._Hashbits(31, [1]) + htable = khmer._Nodegraph(31, [1]) inpath = utils.get_test_data('goodversion-k32.tagset') try: @@ -849,7 +849,7 @@ def test_tagset_ksize_check(): def test_tagset_filetype_check(): - htable = khmer._Hashbits(31, [1]) + htable = khmer._Nodegraph(31, [1]) inpath = utils.get_test_data('goodversion-k32.stoptags') try: @@ -861,14 +861,14 @@ def test_tagset_filetype_check(): def test_bad_primes_list(): try: - coutingtable = khmer._Hashbits(31, ["a", "b", "c"], 1) + coutingtable = khmer._Nodegraph(31, ["a", "b", "c"], 1) assert 0, "Bad primes list should fail" except TypeError as e: print(str(e)) def test_consume_absentfasta_with_reads_parser(): - presencetable = khmer._Hashbits(31, [1]) + presencetable = khmer._Nodegraph(31, [1]) try: presencetable.consume_fasta_with_reads_parser() assert 0, "this should fail" @@ -886,15 +886,15 @@ def test_consume_absentfasta_with_reads_parser(): def test_bad_primes(): try: - countingtable = khmer._Hashbits.__new__( - khmer._Hashbits, 6, ["a", "b", "c"]) + countingtable = khmer._Nodegraph.__new__( + khmer._Nodegraph, 6, ["a", "b", "c"]) assert 0, "this should fail" except TypeError as e: print(str(e)) def test_consume_fasta_and_tag_with_badreads_parser(): - presencetable = khmer.Hashbits(6, 1e6, 2) + presencetable = khmer.Nodegraph(6, 1e6, 2) try: readsparser = khmer.ReadParser(utils.get_test_data("test-empty.fa")) presencetable.consume_fasta_and_tag_with_reads_parser(readsparser) diff --git a/tests/test_read_aligner.py b/tests/test_read_aligner.py index adbf24eece..2779cbdf50 100644 --- a/tests/test_read_aligner.py +++ b/tests/test_read_aligner.py @@ -46,13 +46,13 @@ def neq_(v1, v2): def test_graph_attribute(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) aligner = khmer.ReadAligner(ch, 0, 0) assert aligner.graph is ch def test_align_nothing(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "ACCAAGGCTCGAGATTTACC" aligner = khmer.ReadAligner(ch, 0, 0) @@ -68,7 +68,7 @@ def test_align_nothing(): def test_alignnocov(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "ACCTAGGTTCGACATGTACC" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -83,7 +83,7 @@ def test_alignnocov(): def test_align_middle(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "TCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -100,7 +100,7 @@ def test_align_middle(): def test_align_middle_trunc(): return # @CTB - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "TCGACAAGTCCTTGACAGATGGGGGG" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -124,7 +124,7 @@ def test_align_middle_trunc(): def test_align_middle_trunc_2(): return # @CTB - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "GGGGGGGGGGGGTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -146,7 +146,7 @@ def test_align_middle_trunc_2(): def test_align_fwd_nothing(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "ACCAAGGCTCGAGATTTACC" aligner = khmer.ReadAligner(ch, 0, 0) @@ -162,7 +162,7 @@ def test_align_fwd_nothing(): def test_align_fwd_nocov(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "ACCTAGGTTCGACATGTACC" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -177,7 +177,7 @@ def test_align_fwd_nocov(): def test_align_fwd_middle(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "TCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -193,7 +193,7 @@ def test_align_fwd_middle(): def test_align_fwd_middle_trunc(): return # @CTB - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "TCGACAAGTCCTTGACAGATGGGGGG" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -215,7 +215,7 @@ def test_align_fwd_middle_trunc(): def test_align_fwd_middle_trunc_2(): - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) read = "GGGGGGGGGGGGTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(20): @@ -235,7 +235,7 @@ def test_align_fwd_middle_trunc_2(): def test_align_fwd_covs_1(): K = 10 - ch = khmer.CountingHash(K, 1048576, 1) + ch = khmer.Countgraph(K, 1048576, 1) read = "GTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(19): @@ -258,7 +258,7 @@ def test_align_fwd_covs_1(): def test_align_fwd_covs_2(): K = 10 - ch = khmer.CountingHash(K, 1048576, 1) + ch = khmer.Countgraph(K, 1048576, 1) read = "GTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(19): @@ -283,7 +283,7 @@ def test_align_fwd_covs_2(): def test_align_fwd_covs_3(): K = 10 - ch = khmer.CountingHash(K, 1048576, 1) + ch = khmer.Countgraph(K, 1048576, 1) read = "GTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(19): @@ -309,7 +309,7 @@ def test_align_fwd_covs_3(): def test_align_fwd_covs_4(): K = 10 - ch = khmer.CountingHash(K, 1048576, 1) + ch = khmer.Countgraph(K, 1048576, 1) read = "GTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(19): @@ -333,7 +333,7 @@ def test_align_fwd_covs_4(): def test_align_fwd_covs_5(): K = 10 - ch = khmer.CountingHash(K, 1048576, 1) + ch = khmer.Countgraph(K, 1048576, 1) read = "GTCGACAAGTCCTTGACAGAT" aligner = khmer.ReadAligner(ch, 0, 0) for i in range(19): @@ -357,7 +357,7 @@ def test_align_fwd_covs_5(): def test_simple_readalign(): return # @CTB - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) aligner = khmer.ReadAligner(ch, 2, 0) for i in range(20): ch.consume("AGAGGGAAAGCTAGGTTCGACATGTCCTTGACAGAT") @@ -377,7 +377,7 @@ def test_simple_readalign(): def test_readalign(): return # @CTB - ch = khmer.CountingHash(10, 1048576, 1) + ch = khmer.Countgraph(10, 1048576, 1) aligner = khmer.ReadAligner(ch, 1, 0) for i in range(20): ch.consume("AGAGGGAAAGCTAGGTTCGACAAGTCCTTGACAGAT") @@ -612,7 +612,7 @@ def check_query(aligner, query): def test_readalign_new(): return # @CTB - ch = khmer.CountingHash(32, 1048576, 1) + ch = khmer.Countgraph(32, 1048576, 1) aligner = khmer.ReadAligner(ch, 1, 0) for seq in ht_seqs: ch.consume(seq) @@ -624,7 +624,7 @@ def test_readalign_new(): def test_readaligner_load(): - ct = khmer.CountingHash(32, 1048576, 1) + ct = khmer.Countgraph(32, 1048576, 1) parameters_json = utils.get_test_data('readaligner-default.json') a_aligner = khmer.ReadAligner(ct, 0, 0, filename=parameters_json) a_scoring_matrix = a_aligner.get_scoring_matrix() diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index c281084e83..5173fcfa55 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -254,7 +254,7 @@ def test_create_nodegraph_4_multiplier(): sum(nodegraph.hashsizes()) -def test_report_on_config_bad_hashtype(): +def test_report_on_config_bad_graphtype(): ksize = khmer_args.DEFAULT_K n_tables = khmer_args.DEFAULT_N_TABLES max_tablesize = khmer_args.DEFAULT_MAX_TABLESIZE diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 134af07ad2..39a35f0d5e 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -582,7 +582,7 @@ def test_filter_stoptags(): # now, create a file with some stop tags in it -- K = 18 - kh = khmer._Hashbits(K, [1]) + kh = khmer._Nodegraph(K, [1]) kh.add_stop_tag('GTTGACGGGGCTCAGGGG') kh.save_stop_tags(stopfile) del kh @@ -613,7 +613,7 @@ def test_filter_stoptags_fq(): # now, create a file with some stop tags in it -- K = 18 - kh = khmer._Hashbits(K, [1]) + kh = khmer._Nodegraph(K, [1]) kh.add_stop_tag('GTTGACGGGGCTCAGGGG') kh.save_stop_tags(stopfile) del kh diff --git a/tests/test_subset_graph.py b/tests/test_subset_graph.py index 44f75698fe..fcf82da90f 100644 --- a/tests/test_subset_graph.py +++ b/tests/test_subset_graph.py @@ -21,7 +21,7 @@ def teardown(): class Test_RandomData(object): def test_3_merge_013(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') @@ -43,7 +43,7 @@ def test_3_merge_013(self): assert n_partitions == 1, n_partitions # combined. def test_3_merge_023(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -64,7 +64,7 @@ def test_3_merge_023(self): assert n_partitions == 1, n_partitions # combined. def test_5_merge_046(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph5.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -83,7 +83,7 @@ def test_5_merge_046(self): assert n_partitions == 1, n_partitions # combined. def test_random_20_a_succ(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') outfile = utils.get_temp_filename('out') @@ -102,7 +102,7 @@ def test_random_20_a_succ(self): assert n_partitions == 1, n_partitions def test_random_20_a_succ_II(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') outfile = utils.get_temp_filename('out') @@ -121,7 +121,7 @@ def test_random_20_a_succ_II(self): assert n_partitions == 1, n_partitions def test_random_20_a_succ_III(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') outfile = utils.get_temp_filename('out') @@ -144,7 +144,7 @@ def test_random_20_a_succ_III(self): assert n_partitions == 1, n_partitions def test_random_20_a_succ_IV(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') outfile = utils.get_temp_filename('out') @@ -164,7 +164,7 @@ def test_random_20_a_succ_IV(self): assert n_partitions == 1, n_partitions def test_random_20_a_succ_IV_save(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') savefile_ht = utils.get_temp_filename('ht') @@ -177,7 +177,7 @@ def test_random_20_a_succ_IV_save(self): ht.save_tagset(savefile_tags) del ht - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) ht.load(savefile_ht) ht.load_tagset(savefile_tags) @@ -200,7 +200,7 @@ def test_random_20_a_succ_IV_save(self): class Test_SaveLoadPmap(object): def test_save_load_merge(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -233,7 +233,7 @@ def test_save_load_merge(self): assert n_partitions == 1, n_partitions # combined. def test_save_load_merge_truncate(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -270,7 +270,7 @@ def test_save_load_merge_truncate(self): print(str(err), i) def test_save_load_merge_2(self): - ht = khmer.Hashbits(20, 4 ** 8 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 8 + 1, 2) filename = utils.get_test_data('random-20-a.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -302,7 +302,7 @@ def test_save_load_merge_2(self): assert n_partitions == 1, n_partitions # combined. def test_save_load_merge_nexist(self): - ht = khmer._Hashbits(20, [1]) + ht = khmer._Nodegraph(20, [1]) try: a = ht.load_subset_partitionmap('this does not exist') assert 0, "this should not succeed" @@ -310,7 +310,7 @@ def test_save_load_merge_nexist(self): print(str(e)) def test_save_merge_from_disk(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -339,7 +339,7 @@ def test_save_merge_from_disk(self): assert n_partitions == 1, n_partitions # combined. def test_save_merge_from_disk_2(self): - ht = khmer.Hashbits(20, 4 ** 7 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 7 + 1, 2) filename = utils.get_test_data('random-20-a.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -368,7 +368,7 @@ def test_save_merge_from_disk_2(self): assert n_partitions == 1, n_partitions # combined. def test_save_merge_from_disk_file_not_exist(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -389,7 +389,7 @@ def test_save_merge_from_disk_file_not_exist(self): print(str(e)) def test_merge_from_disk_file_bad_type(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) infile = utils.get_test_data('goodversion-k12.ht') try: @@ -399,7 +399,7 @@ def test_merge_from_disk_file_bad_type(self): print(str(e)) def test_merge_from_disk_file_version(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) infile = utils.get_test_data('badversion-k12.ht') try: @@ -409,7 +409,7 @@ def test_merge_from_disk_file_version(self): print(str(e)) def test_save_merge_from_disk_ksize(self): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -424,7 +424,7 @@ def test_save_merge_from_disk_ksize(self): ht.save_subset_partitionmap(x, outfile1) del x - ht = khmer._Hashbits(19, [1]) + ht = khmer._Nodegraph(19, [1]) try: ht.merge_subset_from_disk(outfile1) assert 0, "this should fail" @@ -433,7 +433,7 @@ def test_save_merge_from_disk_ksize(self): def test_save_load_merge_on_graph(): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -466,7 +466,7 @@ def test_save_load_merge_on_graph(): def test_save_load_on_graph_truncate(): - ht = khmer.Hashbits(20, 4 ** 4 + 1, 2) + ht = khmer.Nodegraph(20, 4 ** 4 + 1, 2) filename = utils.get_test_data('test-graph2.fa') (total_reads, total_kmers) = ht.consume_fasta_and_tag(filename) @@ -506,7 +506,7 @@ def test_save_load_on_graph_truncate(): def test_output_partitions(): filename = utils.get_test_data('test-output-partitions.fa') - ht = khmer._Hashbits(10, [1]) + ht = khmer._Nodegraph(10, [1]) ht.set_partition_id('TTAGGACTGC', 2) ht.set_partition_id('TGCGTTTCAA', 3) ht.set_partition_id('ATACTGTAAA', 4) @@ -531,7 +531,7 @@ def test_output_partitions(): def test_tiny_real_partitions(): filename = utils.get_test_data('real-partition-tiny.fa') - ht = khmer.Hashbits(32, 8e2, 4) + ht = khmer.Nodegraph(32, 8e2, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -558,7 +558,7 @@ def test_tiny_real_partitions(): def test_small_real_partitions(): filename = utils.get_test_data('real-partition-small.fa') - ht = khmer.Hashbits(32, 2e3, 4) + ht = khmer.Nodegraph(32, 2e3, 4) ht.consume_fasta_and_tag(filename) subset = ht.do_subset_partition(0, 0) @@ -600,7 +600,7 @@ def test_small_real_partitions(): def test_partition_on_abundance_1(): print((a,)) print((b,)) - kh = khmer.CountingHash(20, 1e3, 4) + kh = khmer.Countgraph(20, 1e3, 4) for i in range(10): print(kh.consume_and_tag(a)) @@ -614,7 +614,7 @@ def test_partition_on_abundance_1(): def test_partition_on_abundance_2(): - kh = khmer.CountingHash(20, 1e3, 4) + kh = khmer.Countgraph(20, 1e3, 4) for i in range(10): print(kh.consume_and_tag(a)) @@ -628,7 +628,7 @@ def test_partition_on_abundance_2(): def test_partition_on_abundance_3(): - kh = khmer.CountingHash(20, 1e4, 4) + kh = khmer.Countgraph(20, 1e4, 4) for i in range(10): print(kh.consume_and_tag(a)) @@ -647,7 +647,7 @@ def test_partition_on_abundance_3(): def test_partition_overlap_1(): - kh = khmer.CountingHash(20, 1e3, 4) + kh = khmer.Countgraph(20, 1e3, 4) for i in range(10): kh.consume_and_tag(a) @@ -668,7 +668,7 @@ def test_partition_overlap_1(): def test_partition_overlap_2(): - kh = khmer.CountingHash(20, 1e4, 4) + kh = khmer.Countgraph(20, 1e4, 4) for i in range(10): kh.consume_and_tag(a)