In [1]:
from boink.signatures import SourmashSignature
from boink.parsing import FastxParser
from boink.alphabets import DNAN_SIMPLE
from boink import libboink
import cppyy
import cppyy.ll
from sourmash._lowlevel import ffi, lib

In [2]:
def bridge(num, ksize, is_protein=False, dayhoff=False, hp=False, seed=42, max_hash=0):
    from sourmash import MinHash
    from sourmash._lowlevel import ffi
    
    # create boink-side , cppyy-managed KmerMinHash
    bk_sig = SourmashSignature.Signature.build(num,
                                             ksize,
                                             is_protein,
                                             dayhoff,
                                             hp,
                                             seed,
                                             max_hash)
    
    sig = MinHash._from_objptr(ffi.cast('KmerMinHash *', cppyy.ll.addressof(bk_sig._get_ptr())))
    sig._boink_ref = bk_sig
    
    return sig

In [3]:
sig = bridge(500, 21)

In [4]:
processor = SourmashSignature.Processor.build(sig._boink_ref)

In [5]:
parser = FastxParser[DNAN_SIMPLE].build('../examples/ecoli.1.fastq.gz')
for n_reads, state in processor.chunked_process(parser):
    if state.medium:
        print('Processed', n_reads, 'sequences.')

Processed 100000 sequences.
Processed 200000 sequences.
Processed 300000 sequences.
Processed 400000 sequences.
Processed 500000 sequences.
Processed 600000 sequences.
Processed 700000 sequences.
Processed 800000 sequences.
Processed 900000 sequences.
Processed 1000000 sequences.
Processed 1100000 sequences.
Processed 1200000 sequences.
Processed 1300000 sequences.
Processed 1400000 sequences.
Processed 1500000 sequences.
Processed 1600000 sequences.
Processed 1700000 sequences.
Processed 1800000 sequences.


In [7]:
sig.get_hashes()

[3205141980413,
 3236092822679,
 3703694776023,
 6514066417504,
 11713594549730,
 15214235558162,
 17551373616191,
 21150285087017,
 23555715885782,
 23587874902366,
 25771393241063,
 27362219888026,
 28337268266212,
 29520094084160,
 32603191963564,
 33355113951698,
 34635727113501,
 36460262049084,
 42387633352177,
 43341805525762,
 44060762338450,
 45971515261246,
 54267487195332,
 54928807149369,
 57980449935288,
 61762038119671,
 62764922061356,
 63144967958311,
 64532943199117,
 68707543473620,
 72019825479352,
 72830525677828,
 73076489911828,
 80389167076728,
 86709255682763,
 89217825112117,
 94292632250736,
 96823843735004,
 98055163335582,
 100178539101135,
 100743995926159,
 102616779995824,
 104113874982914,
 104177745548946,
 105077740230033,
 106776035873039,
 107138115093535,
 109925963465487,
 112565798999620,
 113269032034961,
 113769038976489,
 116289689660513,
 116589162149174,
 116925212033615,
 117793242025942,
 124408420690137,
 126141660684959,
 128463218921037,