Skip to content

Commit

Permalink
Merge pull request #1758 from dib-lab/python/add_link_tag_and_label
Browse files Browse the repository at this point in the history
[MRG] add GraphLabels.link_tag_and_label to CPython API
  • Loading branch information
standage committed Aug 22, 2017
2 parents d5e7618 + 086d6db commit acdacef
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 14 deletions.
7 changes: 6 additions & 1 deletion include/khmer/_cpy_graphlabels.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

#include <Python.h>
#include "_cpy_utils.hh"
#include "_cpy_hashgraph.hh"
#include "oxli/labelhash.hh"


namespace khmer {

typedef struct {
PyObject_HEAD
khmer_KHashgraph_Object khashgraph;
oxli::LabelHash * labelhash;
} khmer_KGraphLabels_Object;

Expand Down Expand Up @@ -61,6 +62,10 @@ PyObject *
labelhash_get_tag_labels(khmer_KGraphLabels_Object * me, PyObject * args);


PyObject *
labelhash_link_tag_and_label(khmer_KGraphLabels_Object * me, PyObject * args);


PyObject *
labelhash_n_labels(khmer_KGraphLabels_Object * me, PyObject * args);

Expand Down
1 change: 1 addition & 0 deletions khmer/_oxli/graphs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ cdef extern from "khmer/_cpy_khmer.hh":
CpCountgraph * countgraph

ctypedef struct CPyGraphLabels_Object "khmer::khmer_KGraphLabels_Object":
CPyHashgraph_Object khashgraph
CpLabelHash * labelhash


Expand Down
34 changes: 32 additions & 2 deletions src/khmer/_cpy_graphlabels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ PyMethodDef khmer_graphlabels_methods[] = {
{"consume_partitioned_fasta_and_tag_with_labels", (PyCFunction)labelhash_consume_partitioned_fasta_and_tag_with_labels, METH_VARARGS, "" },
{"sweep_tag_neighborhood", (PyCFunction)labelhash_sweep_tag_neighborhood, METH_VARARGS, "" },
{"get_tag_labels", (PyCFunction)labelhash_get_tag_labels, METH_VARARGS, ""},
{"link_tag_and_label", (PyCFunction)labelhash_link_tag_and_label, METH_VARARGS, ""},
{"consume_sequence_and_tag_with_labels", (PyCFunction)labelhash_consume_sequence_and_tag_with_labels, METH_VARARGS, "" },
{"n_labels", (PyCFunction)labelhash_n_labels, METH_VARARGS, ""},
{"get_all_labels", (PyCFunction)labelhash_get_all_labels, METH_VARARGS, "" },
Expand All @@ -84,8 +85,8 @@ void khmer_graphlabels_dealloc(khmer_KGraphLabels_Object * obj)
Py_TYPE(obj)->tp_free((PyObject*)obj);
}

PyObject * khmer_graphlabels_new(PyTypeObject *type, PyObject *args,
PyObject *kwds)
PyObject * khmer_graphlabels_new(PyTypeObject *type, PyObject *args,
PyObject *kwds)
{
khmer_KGraphLabels_Object *self;
self = (khmer_KGraphLabels_Object*)type->tp_alloc(type, 0);
Expand All @@ -94,6 +95,7 @@ void khmer_graphlabels_dealloc(khmer_KGraphLabels_Object * obj)
PyObject * hashgraph_o;
Hashgraph * hashgraph = NULL; // @CTB

// GraphLabels takes a single argument, a hashgraph descendant.
if (!PyArg_ParseTuple(args, "O", &hashgraph_o)) {
Py_DECREF(self);
return NULL;
Expand All @@ -111,6 +113,10 @@ void khmer_graphlabels_dealloc(khmer_KGraphLabels_Object * obj)
Py_DECREF(self);
return NULL;
}
// set 'base' for CPython-style inheritance.
self->khashgraph.khashtable.hashtable =
dynamic_cast<Hashtable*>(hashgraph);
self->khashgraph.hashgraph = dynamic_cast<Hashgraph*>(hashgraph);

try {
self->labelhash = new LabelHash(hashgraph);
Expand Down Expand Up @@ -390,6 +396,30 @@ labelhash_get_tag_labels(khmer_KGraphLabels_Object * me, PyObject * args)
}


PyObject *
labelhash_link_tag_and_label(khmer_KGraphLabels_Object * me, PyObject * args)
{
LabelHash * labelhash = me->labelhash;

PyObject * tag_o;
HashIntoType tag;
Label label;

if (!PyArg_ParseTuple(args, "OK", &tag_o, &label)) {
return NULL;
}
if (!ht_convert_PyObject_to_HashIntoType(tag_o, tag,
labelhash->graph)) {
return NULL;
}

labelhash->link_tag_and_label(tag, label);

Py_INCREF(Py_None);
return Py_None;
}


PyObject *
labelhash_n_labels(khmer_KGraphLabels_Object * me, PyObject * args)
{
Expand Down
21 changes: 14 additions & 7 deletions src/khmer/_cpy_hashgraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -913,13 +913,17 @@ hashgraph_add_tag(khmer_KHashgraph_Object * me, PyObject * args)
{
Hashgraph * hashgraph = me->hashgraph;

const char * kmer_s = NULL;
if (!PyArg_ParseTuple(args, "s", &kmer_s)) {
PyObject * tag_o;
if (!PyArg_ParseTuple(args, "O", &tag_o)) {
return NULL;
}

HashIntoType tag;
if (!ht_convert_PyObject_to_HashIntoType(tag_o, tag, hashgraph)) {
return NULL;
}

HashIntoType kmer = hashgraph->hash_dna(kmer_s);
hashgraph->add_tag(kmer);
hashgraph->add_tag(tag);

Py_RETURN_NONE;
}
Expand All @@ -930,12 +934,15 @@ hashgraph_add_stop_tag(khmer_KHashgraph_Object * me, PyObject * args)
{
Hashgraph * hashgraph = me->hashgraph;

const char * kmer_s = NULL;
if (!PyArg_ParseTuple(args, "s", &kmer_s)) {
PyObject * kmer_o;
if (!PyArg_ParseTuple(args, "O", &kmer_o)) {
return NULL;
}

HashIntoType kmer = hashgraph->hash_dna(kmer_s);
HashIntoType kmer;
if (!ht_convert_PyObject_to_HashIntoType(kmer_o, kmer, hashgraph)) {
return NULL;
}
hashgraph->add_stop_tag(kmer);

Py_RETURN_NONE;
Expand Down
4 changes: 3 additions & 1 deletion src/oxli/labelhash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ void LabelHash::consume_partitioned_fasta_and_tag_with_labels(
printdbg(deleted parser and exiting)
}

// @cswelcher: double-check -- is it valid to pull the address from a reference?
// Note: this function assumes that 'kmer' is already in graph->all_tags;
// see usage elsewhere in this code.

void LabelHash::link_tag_and_label(const HashIntoType kmer,
const Label kmer_label)
{
Expand Down
43 changes: 40 additions & 3 deletions tests/test_labelhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,21 +201,58 @@ def test_get_tag_labels():
assert labels.pop() == 0


def test_link_tag_and_label():
lb = GraphLabels(20, 1, 1)

tag = 173473779682
lb.add_tag(tag)
lb.link_tag_and_label(tag, 1)

labels = lb.get_tag_labels(tag)
assert len(labels) == 1
assert labels.pop() == 1


def test_link_tag_and_label_using_string():
lb = GraphLabels(20, 1, 1)

kmer = lb.reverse_hash(173473779682)
lb.add_tag(kmer)
lb.link_tag_and_label(kmer, 1)

labels = lb.get_tag_labels(kmer)
assert len(labels) == 1
assert labels.pop() == 1


def test_link_tag_and_label_using_string_2():
lb = GraphLabels(20, 1, 1)

tag = 173473779682
kmer = lb.reverse_hash(tag)
lb.add_tag(kmer)
lb.link_tag_and_label(kmer, 1)

labels = lb.get_tag_labels(tag) # <-- use 'tag' instead of 'kmer'
assert len(labels) == 1
assert labels.pop() == 1


def test_consume_seqfile_and_tag_with_labels():
lb = GraphLabels(20, 1e7, 4)
read_1 = 'ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTT'
filename = utils.get_test_data('test-transcript.fa')

total_reads, _ = lb.consume_seqfile_and_tag_with_labels(filename)
print("doing get")
assert lb.graph.get(read_1[:20])
assert lb.get(read_1[:20])
assert total_reads == 3
print("doing n_labels")
print(lb.n_labels())
print("doing all labels")
print(lb.get_all_labels())
print("get tagset")
for tag in lb.graph.get_tagset():
for tag in lb.get_tagset():
print("forward hash")
print(tag, khmer.forward_hash(tag, 20))
for record in screed.open(filename):
Expand Down Expand Up @@ -259,7 +296,7 @@ def test_consume_sequence_and_tag_with_labels():
def test_sweep_tag_neighborhood():
lb = GraphLabels(20, 1e7, 4)
filename = utils.get_test_data('single-read.fq')
lb.graph.consume_seqfile_and_tag(filename)
lb.consume_seqfile_and_tag(filename)

tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT')
assert len(tags) == 1
Expand Down
40 changes: 40 additions & 0 deletions tests/test_nodegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,46 @@ def test_bad_create():
assert 'tablesizes needs to be one or more numbers' in str(err)


def test_add_tag():
nodegraph = khmer._Nodegraph(6, [1])

assert nodegraph.n_tags() == 0
nodegraph.add_tag('AATAAG')
assert nodegraph.n_tags() == 1

print(nodegraph.get_tagset())
assert nodegraph.get_tagset() == ['AATAAG']


def test_add_tag_hashval():
nodegraph = khmer._Nodegraph(6, [1])

assert nodegraph.n_tags() == 0
kmer = nodegraph.hash('AATAAG')
nodegraph.add_tag(kmer)
assert nodegraph.n_tags() == 1

print(nodegraph.get_tagset())
assert nodegraph.get_tagset() == ['AATAAG']


def test_add_stop_tag():
nodegraph = khmer._Nodegraph(6, [1])

nodegraph.add_stop_tag('AATAAG')
print(nodegraph.get_stop_tags())
assert nodegraph.get_stop_tags() == ['AATAAG']


def test_add_stop_tag_hashval():
nodegraph = khmer._Nodegraph(6, [1])

kmer = nodegraph.hash('AATAAG')
nodegraph.add_stop_tag(kmer)
print(nodegraph.get_stop_tags())
assert nodegraph.get_stop_tags() == ['AATAAG']


def test__get_set_tag_density():
nodegraph = khmer._Nodegraph(32, [1])
orig = nodegraph._get_tag_density()
Expand Down

0 comments on commit acdacef

Please sign in to comment.