Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cython tidying #1774

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions include/khmer/_cpy_khmer.hh
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,6 @@ Contact: khmer-project@idyll.org

namespace khmer {

PyObject * forward_hash(PyObject * self, PyObject * args);

PyObject * forward_hash_no_rc(PyObject * self, PyObject * args);

PyObject * reverse_hash(PyObject * self, PyObject * args);

PyObject * murmur3_forward_hash(PyObject * self, PyObject * args);

PyObject * murmur3_forward_hash_no_rc(PyObject * self, PyObject * args);

PyObject * reverse_complement(PyObject * self, PyObject * args);

PyObject * get_version_cpp( PyObject * self, PyObject * args );

extern PyMethodDef KhmerMethods[];

}
Expand Down
2 changes: 2 additions & 0 deletions include/oxli/oxli.hh
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ private:\
namespace oxli
{

extern std::string get_version_cpp();

// largest number we can count up to, exactly. (8 bytes)
typedef unsigned long long int ExactCounterType;

Expand Down
11 changes: 11 additions & 0 deletions include/oxli/oxli_exception.hh
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ public:
: oxli_file_exception(msg) {}
};


class EmptyStream : public oxli_file_exception
{
public:
EmptyStream()
: oxli_file_exception("Generic EmptyStream error") {}
explicit EmptyStream(const std::string& msg)
: oxli_file_exception(msg) {}
};


class StreamReadError : public oxli_file_exception
{
public:
Expand Down
143 changes: 21 additions & 122 deletions khmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,145 +36,51 @@
"""This is khmer; please see http://khmer.readthedocs.io/."""


from collections import namedtuple
from math import log
import json


from khmer._khmer import Read
from khmer._khmer import forward_hash
# tests/test_{functions,countgraph,counting_single}.py

from khmer._khmer import forward_hash_no_rc # tests/test_functions.py

from khmer._khmer import reverse_hash # tests/test_functions.py
# tests/counting_single.py

from khmer._khmer import hash_murmur3 # tests/test_functions.py
from khmer._khmer import hash_no_rc_murmur3 # tests/test_functions.py

from khmer._khmer import reverse_complement

from khmer._khmer import get_version_cpp as __version_cpp__
# tests/test_version.py

from khmer._khmer import ReadParser # sandbox/to-casava-1.8-fastq.py
# tests/test_read_parsers.py,scripts/{filter-abund-single,load-graph}.py
# scripts/{abundance-dist-single,load-into-counting}.py

from khmer._khmer import FILETYPES
from khmer._oxli.assembly import (LinearAssembler, SimpleLabeledAssembler,
JunctionCountAssembler)

from khmer._oxli.graphs import (Counttable, QFCounttable, Nodetable,
SmallCounttable, Countgraph, SmallCountgraph,
Nodegraph)
from khmer._oxli.labeling import GraphLabels
from khmer._oxli.legacy_partitioning import SubsetPartition, PrePartitionInfo
from khmer._oxli.parsing import FastxParser
from khmer._oxli.readaligner import ReadAligner
Nodegraph, _buckets_per_byte)

from khmer._oxli.utils import get_n_primes_near_x, is_prime
import sys
from khmer._oxli.hashing import (forward_hash, forward_hash_no_rc,
reverse_hash, hash_murmur3,
hash_no_rc_murmur3,
reverse_complement)

from struct import pack, unpack
from khmer._oxli.hashset import HashSet

from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
from khmer._oxli.hllcounter import HLLCounter

from khmer._oxli.labeling import GraphLabels

_buckets_per_byte = {
# calculated by hand from settings in third-part/cqf/gqf.h
'qfcounttable': 1 / 1.26,
'countgraph': 1,
'smallcountgraph': 2,
'nodegraph': 8,
}
from khmer._oxli.legacy_partitioning import SubsetPartition, PrePartitionInfo

from khmer._oxli.parsing import (FastxParser, SanitizedFastxParser,
BrokenPairedReader)

def extract_nodegraph_info(filename):
"""Open the given nodegraph file and return a tuple of information.
from khmer._oxli.readaligner import ReadAligner

Returns: the k-mer size, the table size, the number of tables, the version
of the table format, and the type of table flag.
from khmer._oxli.utils import get_n_primes_near_x, is_prime, FILETYPES
from khmer._oxli.utils import get_version_cpp as __version_cpp__

Keyword argument:
filename -- the name of the nodegraph file to inspect
"""
ksize = None
n_tables = None
table_size = None
signature = None
version = None
ht_type = None
occupied = None

uint_size = len(pack('I', 0))
uchar_size = len(pack('B', 0))
ulonglong_size = len(pack('Q', 0))

try:
with open(filename, 'rb') as nodegraph:
signature, = unpack('4s', nodegraph.read(4))
version, = unpack('B', nodegraph.read(1))
ht_type, = unpack('B', nodegraph.read(1))
ksize, = unpack('I', nodegraph.read(uint_size))
n_tables, = unpack('B', nodegraph.read(uchar_size))
occupied, = unpack('Q', nodegraph.read(ulonglong_size))
table_size, = unpack('Q', nodegraph.read(ulonglong_size))
if signature != b"OXLI":
raise ValueError("Node graph '{}' is missing file type "
"signature".format(filename) + str(signature))
except:
raise ValueError("Node graph '{}' is corrupt ".format(filename))

return ksize, round(table_size, -2), n_tables, version, ht_type, occupied


def extract_countgraph_info(filename):
"""Open the given countgraph file and return a tuple of information.

Return: the k-mer size, the table size, the number of tables, the bigcount
flag, the version of the table format, and the type of table flag.
import sys

Keyword argument:
filename -- the name of the countgraph file to inspect
"""
CgInfo = namedtuple("CgInfo", ['ksize', 'n_tables', 'table_size',
'use_bigcount', 'version', 'ht_type',
'n_occupied'])
ksize = None
n_tables = None
table_size = None
signature = None
version = None
ht_type = None
use_bigcount = None
occupied = None

uint_size = len(pack('I', 0))
ulonglong_size = len(pack('Q', 0))

try:
with open(filename, 'rb') as countgraph:
signature, = unpack('4s', countgraph.read(4))
version, = unpack('B', countgraph.read(1))
ht_type, = unpack('B', countgraph.read(1))
if ht_type != FILETYPES['SMALLCOUNT']:
use_bigcount, = unpack('B', countgraph.read(1))
else:
use_bigcount = None
ksize, = unpack('I', countgraph.read(uint_size))
n_tables, = unpack('B', countgraph.read(1))
occupied, = unpack('Q', countgraph.read(ulonglong_size))
table_size, = unpack('Q', countgraph.read(ulonglong_size))
if signature != b'OXLI':
raise ValueError("Count graph file '{}' is missing file type "
"signature. ".format(filename) + str(signature))
except:
raise ValueError("Count graph file '{}' is corrupt ".format(filename))

return CgInfo(ksize, n_tables, round(table_size, -2), use_bigcount,
version, ht_type, occupied)

from ._version import get_versions
__version__ = get_versions()['version']
del get_versions


def calc_expected_collisions(graph, force=False, max_false_pos=.2):
Expand Down Expand Up @@ -212,10 +118,3 @@ def calc_expected_collisions(graph, force=False, max_false_pos=.2):
sys.exit(1)

return fp_all


from khmer._oxli.assembly import (LinearAssembler, SimpleLabeledAssembler,
JunctionCountAssembler)
from khmer._oxli.hashset import HashSet
from khmer._oxli.hllcounter import HLLCounter
from khmer._oxli.labeling import GraphLabels
6 changes: 0 additions & 6 deletions khmer/_oxli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +0,0 @@
from .assembly import LinearAssembler
from .hashing import Kmer
from .parsing import Alphabets, Sequence, ReadBundle, UnpairedReadsError
from .parsing import FastxParser, SanitizedFastxParser, SplitPairedReader
from .parsing import BrokenPairedReader, _split_left_right
from .parsing import check_is_left, check_is_right, check_is_pair
3 changes: 3 additions & 0 deletions khmer/_oxli/graphs.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from khmer._oxli.hashing cimport Kmer, CpKmer, KmerSet, CpKmerFactory, CpKmerIte
from khmer._oxli.parsing cimport CpReadParser, CpSequence, FastxParserPtr
from khmer._oxli.legacy_partitioning cimport (CpSubsetPartition, cp_pre_partition_info,
SubsetPartition)
from khmer._oxli.sequence cimport Sequence
from khmer._oxli.utils cimport oxli_raise_py_error


Expand Down Expand Up @@ -249,6 +250,8 @@ cdef class Hashtable:
cdef FastxParserPtr _get_parser(self, object parser_or_filename) except *
cdef list _get_raw_tables(self, uint8_t **, vector[uint64_t])

cdef int _trim_on_abundance(self, Sequence sequence, int abundance)


cdef class QFCounttable(Hashtable):
cdef shared_ptr[CpQFCounttable] _qf_this
Expand Down
Loading