Skip to content

Commit

Permalink
Merge branch 'new-sketchlib'
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Apr 20, 2020
2 parents 28c3e83 + f47c392 commit 6dcc6b1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 9 deletions.
17 changes: 15 additions & 2 deletions PopPUNK/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .models import *

from .sketchlib import no_sketchlib
from .sketchlib import no_sketchlib, checkSketchlibLibrary

from .network import fetchNetwork
from .network import constructNetwork
Expand Down Expand Up @@ -115,6 +115,12 @@ def get_options():
kmerGroup.add_argument('--k-step', default = 4, type=int, help='K-mer step size [default = 4]')
kmerGroup.add_argument('--sketch-size', default=10000, type=int, help='Kmer sketch size [default = 10000]')
kmerGroup.add_argument('--min-kmer-count', default=0, type=int, help='Minimum k-mer count when using reads as input [default = 0]')
kmerGroup.add_argument('--exact-count', default=False, action='store_true',
help='Use the exact k-mer counter with reads '
'[default = use countmin counter]')
kmerGroup.add_argument('--strand-preserved', default=False, action='store_true',
help='Treat input as being on the same strand, and ignore reverse complement '
'k-mers [default = use canonical k-mers]')

# qc options
qcGroup = parser.add_argument_group('Quality control options')
Expand Down Expand Up @@ -260,7 +266,14 @@ def main():
# run according to mode
sys.stderr.write("PopPUNK (POPulation Partitioning Using Nucleotide Kmers)\n")
sys.stderr.write("\t(with backend: " + dbFuncs['backend'] + " v" + dbFuncs['backend_version'] + ")\n")

if (dbFuncs['backend'] == 'sketchlib'):
sketchlib_version = dbFuncs['backend_version'].split(".")
if sketchlib_version[0] < 1 or sketchlib_version[0] < 3:
sys.stderr.write("This version of PopPUNK requires sketchlib v1.3.0 or higher\n")
sys.exit(1)
else:
sys.stderr.write('\tsketchlib: ' + checkSketchlibLibrary() + '\n')

#******************************#
#* *#
#* Create database *#
Expand Down
37 changes: 31 additions & 6 deletions PopPUNK/sketchlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
try:
no_sketchlib = False
import pp_sketchlib
print("sketchlib in " + pp_sketchlib.__file__)
import h5py
except ImportError as e:
sys.stderr.write("Sketchlib backend not available")
Expand All @@ -41,7 +40,7 @@
sketchlib_exe = "poppunk_sketch"

def checkSketchlibVersion():
"""Checks that sketchlib can be run, and returns versiob
"""Checks that sketchlib can be run, and returns version
Returns:
version (str)
Expand All @@ -56,6 +55,16 @@ def checkSketchlibVersion():

return version

def checkSketchlibLibrary():
"""Gets the location of the sketchlib library
Returns:
lib (str)
Location of sketchlib .so/.dyld
"""
sketchlib_loc = pp_sketchlib.__file__
return(sketchlib_loc)

def createDatabaseDir(outPrefix, kmers):
"""Creates the directory to write sketches to, removing old files if unnecessary
Expand Down Expand Up @@ -280,7 +289,8 @@ def removeFromDB(db_name, out_name, removeSeqs):

def constructDatabase(assemblyList, klist, sketch_size, oPrefix, estimated_length,
ignoreLengthOutliers = False, threads = 1, overwrite = False,
reads = False, min_count = 0):
reads = False, strand_preserved = False, min_count = 0,
use_exact = False):
"""Sketch the input assemblies at the requested k-mer lengths
A multithread wrapper around :func:`~runSketch`. Threads are used to either run multiple sketch
Expand Down Expand Up @@ -316,6 +326,18 @@ def constructDatabase(assemblyList, klist, sketch_size, oPrefix, estimated_lengt
reads (bool)
If any reads are being used as input, do not run QC
(default = False)
strand_preserved (bool)
Ignore reverse complement k-mers
(default = False)
min_count (int)
Minimum count of k-mer in reads to include
(default = 0)
use_exact (bool)
Use exact count of k-mer appearance in reads
(default = False)
"""
names, sequences = readRfile(assemblyList)
Expand All @@ -330,7 +352,8 @@ def constructDatabase(assemblyList, klist, sketch_size, oPrefix, estimated_lengt
sys.stderr.write("Overwriting db: " + dbfilename + "\n")
os.remove(dbfilename)

pp_sketchlib.constructDatabase(dbname, names, sequences, klist, sketch_size, min_count, threads)
pp_sketchlib.constructDatabase(dbname, names, sequences, klist, sketch_size,
not strand_preserved, min_count, use_exact, threads)

def queryDatabase(rNames, qNames, dbPrefix, queryPrefix, klist, self = True, number_plot_fits = 0,
threads = 1, use_gpu = False, deviceid = 0):
Expand Down Expand Up @@ -394,7 +417,8 @@ def queryDatabase(rNames, qNames, dbPrefix, queryPrefix, klist, self = True, num
qNames = rNames

# Calls to library
distMat = pp_sketchlib.queryDatabase(ref_db, ref_db, rNames, rNames, klist, threads, use_gpu, deviceid)
distMat = pp_sketchlib.queryDatabase(ref_db, ref_db, rNames, rNames, klist,
False, threads, use_gpu, deviceid)

# option to plot core/accessory fits. Choose a random number from cmd line option
if number_plot_fits > 0:
Expand All @@ -418,6 +442,7 @@ def queryDatabase(rNames, qNames, dbPrefix, queryPrefix, klist, self = True, num
exit(0)

# Calls to library
distMat = pp_sketchlib.queryDatabase(ref_db, query_db, rNames, qNames, klist, threads, use_gpu, deviceid)
distMat = pp_sketchlib.queryDatabase(ref_db, query_db, rNames, qNames, klist,
False, threads, use_gpu, deviceid)

return(rNames, qNames, distMat)
3 changes: 2 additions & 1 deletion PopPUNK/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def setupDBFuncs(args, kmers, min_count):
backend = "sketchlib"
version = checkSketchlibVersion()

constructDatabase = partial(constructDatabaseSketchlib, min_count = min_count)
constructDatabase = partial(constructDatabaseSketchlib, strand_preserved = args.strand_preserved,
min_count = args.min_kmer_count, use_exact = args.exact_count)
queryDatabase = partial(queryDatabaseSketchlib, use_gpu = args.use_gpu, deviceid = args.deviceid)

# Dict of DB access functions for assign_query (which is out of scope)
Expand Down

0 comments on commit 6dcc6b1

Please sign in to comment.