More consolidation of Hashtable derived types. #1504

Merged
merged 34 commits into from Nov 15, 2016
Commits
+3,935 −3,330
Split
View
@@ -1,3 +1,24 @@
+2016-11-15 Titus Brown <titus@idyll.org
+
+ * khmer/{_cpy_counttable,_cpy_hashgraph.hh,cpy_nodetable.hh,_khmer.cc,
+ khmer/__init__.py}: new Counttable and Nodetable CPython types, plus
+ renaming of CountingHash to CountGraph and Hashbits to Nodegraph;
+ refactoring of CPython hierarchy to separate graph methods from table
+ methods.
+ * lib/hashtable.{cc,hh}: pull some more CountingHash/Counttable methods
+ back to base class; rename.
+ * lib/hashgraph.{cc,hh}: create new files containing the Countgraph
+ and Nodegraph classes.
+ * Makefile, setup.py: updated dependencies for added and removed files.
+ * lib/Makefile: eliminated lib/counting.{cc,hh} and lib/hashbits.{cc,hh}.
+ * lib/{labelhash.cc,read_aligner.hh,storage.cc,storage.hh,subset.cc,
+ subset.hh,test-Colors.cc,test-compile.cc},
+ examples/c++-api/count-demo.cc - renamed CountingHash and Hashbits.
+ * tests/test_cpython_hierarchy.py - some preliminary tests for new CPython
+ inheritance hierarchy.
+ * tests/{test_nodegraph,test_countgraph}.py - added creation tests of
+ 'tablesizes' passed into Nodegraph and Countgraph constructors.
+
2016-11-15 Luiz Irber <khmer@luizirber.org>
* khmer/__init__.py,tests/test_functions.py: Fix get_n_primes_near_x to
View
@@ -37,7 +37,7 @@
# `SHELL=bash` Will break Titus's laptop, so don't use BASH-isms like
# `[[` conditional expressions.
-CPPSOURCES=$(wildcard lib/*.cc lib/*.hh khmer/_khmer.cc) setup.py
+CPPSOURCES=$(wildcard lib/*.cc lib/*.hh khmer/_khmer.cc khmer/*.hh) setup.py
PYSOURCES=$(filter-out khmer/_version.py, \
$(wildcard khmer/*.py scripts/*.py oxli/*.py) )
SOURCES=$(PYSOURCES) $(CPPSOURCES) setup.py
@@ -4,7 +4,7 @@
#include <vector>
#include <cmath>
#include "khmer.hh"
-#include "counting.hh"
+#include "hashtable.hh"
using namespace khmer;
@@ -22,7 +22,7 @@ int main()
std::vector<HashIntoType> tablesize;
tablesize.push_back(pow(4, ksize));
- CountingHash ktable(ksize, tablesize);
+ Counttable ktable(ksize, tablesize);
ktable.consume_string("ATGGCGATGGCAAGTAGGACCCAGATGGACCAAAG");
View
@@ -41,8 +41,10 @@
import json
from khmer._khmer import Countgraph as _Countgraph
+from khmer._khmer import Counttable as _Counttable
from khmer._khmer import GraphLabels as _GraphLabels
from khmer._khmer import Nodegraph as _Nodegraph
+from khmer._khmer import Nodetable as _Nodetable
from khmer._khmer import HLLCounter as _HLLCounter
from khmer._khmer import ReadAligner as _ReadAligner
from khmer._khmer import LinearAssembler
@@ -278,6 +280,15 @@ def __new__(cls, k, starting_size, n_tables):
return countgraph
+class Counttable(_Counttable):
+
+ def __new__(cls, k, starting_size, n_tables):
+ primes = get_n_primes_near_x(n_tables, starting_size)
+ counttable = _Counttable.__new__(cls, k, primes)
+ counttable.primes = primes
+ return counttable
+
+
class GraphLabels(_GraphLabels):
def __new__(cls, k, starting_size, n_tables):
@@ -306,6 +317,15 @@ def __new__(cls, k, starting_size, n_tables):
return nodegraph
+class Nodetable(_Nodetable):
+
+ def __new__(cls, k, starting_size, n_tables):
+ primes = get_n_primes_near_x(n_tables, starting_size)
+ nodetable = _Nodetable.__new__(cls, k, primes)
+ nodetable.primes = primes
+ return nodetable
+
+
class HLLCounter(_HLLCounter):
"""HyperLogLog counter.
View
@@ -0,0 +1,128 @@
+/*
+This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+Copyright (C) 2010-2015, Michigan State University.
+Copyright (C) 2015-2016, The Regents of the University of California.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of the Michigan State University nor the names
+ of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+LICENSE (END)
+
+Contact: khmer-project@idyll.org
+*/
+
+typedef struct {
+ khmer_KHashtable_Object khashtable;
+ Counttable * counttable;
+} khmer_KCounttable_Object;
+
+static PyObject* khmer_counttable_new(PyTypeObject * type, PyObject * args,
+ PyObject * kwds);
+
+static PyTypeObject khmer_KCounttable_Type
+CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF("khmer_KCounttable_Object")
+= {
+ PyVarObject_HEAD_INIT(NULL, 0) /* init & ob_size */
+ "_khmer.Counttable", /* tp_name */
+ sizeof(khmer_KCounttable_Object), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ 0, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "counttable object", /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ khmer_counttable_new, /* tp_new */
+};
+
+
+//
+// khmer_counttable_new
+//
+
+static PyObject* khmer_counttable_new(PyTypeObject * type, PyObject * args,
+ PyObject * kwds)
+{
+ khmer_KCounttable_Object * self;
+
+ self = (khmer_KCounttable_Object *)type->tp_alloc(type, 0);
+
+ if (self != NULL) {
+ WordLength k = 0;
+ PyListObject * sizes_list_o = NULL;
@betatim

betatim Nov 15, 2016

Member

Do you know who owns the reference to the list that comes from PyArg_ParseTuple?

@betatim

betatim Nov 15, 2016

Member

PyArg_ParseTuple doesn't increase the reference count, and presumably the ref count can't decrease to zero while we are using an argument to this function.

@ctb

ctb Nov 15, 2016

Owner

Yep (or at least not with the GIL held).

+
+ if (!PyArg_ParseTuple(args, "bO!", &k, &PyList_Type, &sizes_list_o)) {
+ Py_DECREF(self);
+ return NULL;
+ }
+
+ std::vector<uint64_t> sizes;
+ if (!convert_Pytablesizes_to_vector(sizes_list_o, sizes)) {
+ Py_DECREF(self);
+ return NULL;
+ }
+
+ try {
+ self->counttable = new Counttable(k, sizes);
+ } catch (std::bad_alloc &e) {
+ Py_DECREF(self);
+ return PyErr_NoMemory();
+ }
+ self->khashtable.hashtable =
+ dynamic_cast<Hashtable*>(self->counttable);
+ }
+
+ return (PyObject *) self;
+}
Oops, something went wrong.