Skip to content

Commit

Permalink
Specify rapidnj's threads (#139)
Browse files Browse the repository at this point in the history
* Specify rapidnj's threads

* Add threshold to tests
  • Loading branch information
johnlees committed Jan 7, 2021
1 parent 1ec9058 commit 55e99c1
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 15 deletions.
34 changes: 23 additions & 11 deletions PopPUNK/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,11 +619,13 @@ def writeClusterCsv(outfile, nodeNames, nodeLabels, clustering,
for col in d:
this_col_items = len(d[col])
if prev_col_items > -1 and prev_col_items != this_col_items:
sys.stderr.write("Discrepant length between " + prev_col_name + " (length of " + prev_col_items + ") and " + col + "(length of " + this_col_items + ")\n")
sys.stderr.write("Discrepant length between " + prev_col_name + \
" (length of " + prev_col_items + ") and " + \
col + "(length of " + this_col_items + ")\n")
sys.exit(1)


def buildRapidNJ(rapidnj, refList, coreMat, outPrefix, tree_filename):
def buildRapidNJ(rapidnj, refList, coreMat, outPrefix, tree_filename, threads = 1):
"""Use rapidNJ for more rapid tree building
Creates a phylip of core distances, system call to rapidnj executable, loads tree as
Expand All @@ -642,6 +644,8 @@ def buildRapidNJ(rapidnj, refList, coreMat, outPrefix, tree_filename):
Prefix for all generated output files, which will be placed in `outPrefix` subdirectory
tree_filename (str)
Filename for output tree (saved to disk)
threads (int)
Number of threads to use
Returns:
tree (dendropy.Tree)
Expand All @@ -657,7 +661,7 @@ def buildRapidNJ(rapidnj, refList, coreMat, outPrefix, tree_filename):
pFile.write("\n")

# construct tree
rapidnj_cmd = rapidnj + " " + phylip_name + " -n -i pd -o t -x " + tree_filename + ".raw"
rapidnj_cmd = rapidnj + " " + phylip_name + " -n -i pd -o t -x " + tree_filename + ".raw -c " + str(threads)
try:
# run command
subprocess.run(rapidnj_cmd, shell=True, check=True)
Expand All @@ -680,7 +684,7 @@ def buildRapidNJ(rapidnj, refList, coreMat, outPrefix, tree_filename):
return tree

def outputsForMicroreact(combined_list, coreMat, accMat, clustering, perplexity, outPrefix, epiCsv,
rapidnj, queryList = None, overwrite = False):
rapidnj, queryList = None, overwrite = False, threads = 1):
"""Generate files for microreact
Output a neighbour joining tree (.nwk) from core distances, a plot of t-SNE clustering
Expand Down Expand Up @@ -711,6 +715,8 @@ def outputsForMicroreact(combined_list, coreMat, accMat, clustering, perplexity,
(default = None)
overwrite (bool)
Overwrite existing output if present (default = False)
threads (int)
Number of threads to use with rapidnj
"""
# Avoid recursive import
from .tsne import generate_tsne
Expand All @@ -724,10 +730,10 @@ def outputsForMicroreact(combined_list, coreMat, accMat, clustering, perplexity,


# write the phylogeny .nwk; t-SNE network .dot; clusters + data .csv
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.nwk", rapidnj, overwrite)
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.nwk", rapidnj, overwrite, threads)
generate_tsne(seqLabels, accMat, perplexity, outPrefix, overwrite)

def generate_phylogeny(coreMat, seqLabels, outPrefix, tree_suffix, rapidnj, overwrite):
def generate_phylogeny(coreMat, seqLabels, outPrefix, tree_suffix, rapidnj, overwrite, threads):
"""Generate phylogeny using dendropy or RapidNJ
Writes a neighbour joining tree (.nwk) from core distances.
Expand All @@ -746,6 +752,8 @@ def generate_phylogeny(coreMat, seqLabels, outPrefix, tree_suffix, rapidnj, over
use dendropy by default
overwrite (bool)
Overwrite existing output if present (default = False)
threads (int)
Number of threads to use with rapidnj
"""
# Save distances to file
core_dist_file = outPrefix + "/" + os.path.basename(outPrefix) + "_core_dists.csv"
Expand All @@ -756,7 +764,7 @@ def generate_phylogeny(coreMat, seqLabels, outPrefix, tree_suffix, rapidnj, over
if overwrite or not os.path.isfile(tree_filename):
sys.stderr.write("Building phylogeny\n")
if rapidnj is not None:
tree = buildRapidNJ(rapidnj, seqLabels, coreMat, outPrefix, tree_filename)
tree = buildRapidNJ(rapidnj, seqLabels, coreMat, outPrefix, tree_filename, threads)
else:
pdm = dendropy.PhylogeneticDistanceMatrix.from_csv(src=open(core_dist_file),
delimiter=",",
Expand All @@ -779,7 +787,7 @@ def generate_phylogeny(coreMat, seqLabels, outPrefix, tree_suffix, rapidnj, over
os.remove(core_dist_file)

def outputsForPhandango(combined_list, coreMat, clustering, outPrefix, epiCsv, rapidnj,
queryList = None, overwrite = False, microreact = False):
queryList = None, overwrite = False, microreact = False, threads = 1):
"""Generate files for Phandango
Write a neighbour joining tree (.tree) from core distances
Expand Down Expand Up @@ -808,6 +816,8 @@ def outputsForPhandango(combined_list, coreMat, clustering, outPrefix, epiCsv, r
Overwrite existing output if present (default = False)
microreact (bool)
Avoid regenerating tree if already built for microreact (default = False)
threads (int)
Number of threads to use with rapidnj
"""
# generate sequence labels
seqLabels = isolateNameToLabel(combined_list)
Expand All @@ -823,10 +833,10 @@ def outputsForPhandango(combined_list, coreMat, clustering, outPrefix, epiCsv, r
sys.stderr.write('Copying microreact tree')
copyfile(microreact_tree_filename, phandango_tree_filename)
else:
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.tree", rapidnj, overwrite)
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.tree", rapidnj, overwrite, threads)

def outputsForGrapetree(combined_list, coreMat, clustering, outPrefix, epiCsv, rapidnj,
queryList = None, overwrite = False, microreact = False):
queryList = None, overwrite = False, microreact = False, threads = 1):
"""Generate files for Grapetree
Write a neighbour joining tree (.nwk) from core distances
Expand Down Expand Up @@ -859,6 +869,8 @@ def outputsForGrapetree(combined_list, coreMat, clustering, outPrefix, epiCsv, r
Overwrite existing output if present (default = False).
microreact (bool)
Avoid regenerating tree if already built for microreact (default = False).
threads (int)
Number of threads to use with rapidnj
"""
# generate sequence labels
seqLabels = isolateNameToLabel(combined_list)
Expand All @@ -872,6 +884,6 @@ def outputsForGrapetree(combined_list, coreMat, clustering, outPrefix, epiCsv, r
if microreact and os.path.isfile(microreact_tree_filename):
sys.stderr.write('Using microreact tree')
else:
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.nwk", rapidnj, overwrite)
generate_phylogeny(coreMat, seqLabels, outPrefix, "_core_NJ.nwk", rapidnj, overwrite, threads)


6 changes: 3 additions & 3 deletions PopPUNK/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,15 +314,15 @@ def generate_visualisations(query_db,
if microreact:
sys.stderr.write("Writing microreact output\n")
outputsForMicroreact(combined_seq, core_distMat, acc_distMat, isolateClustering, perplexity,
output, info_csv, rapidnj, queryList = qlist, overwrite = overwrite)
output, info_csv, rapidnj, queryList = qlist, overwrite = overwrite, threads = threads)
if phandango:
sys.stderr.write("Writing phandango output\n")
outputsForPhandango(combined_seq, core_distMat, isolateClustering, output, info_csv, rapidnj,
queryList = qlist, overwrite = overwrite, microreact = microreact)
queryList = qlist, overwrite = overwrite, microreact = microreact, threads = threads)
if grapetree:
sys.stderr.write("Writing grapetree output\n")
outputsForGrapetree(combined_seq, core_distMat, isolateClustering, output, info_csv, rapidnj,
queryList = qlist, overwrite = overwrite, microreact = microreact)
queryList = qlist, overwrite = overwrite, microreact = microreact, threads = threads)
if cytoscape:
sys.stderr.write("Writing cytoscape output\n")
genomeNetwork, cluster_file = fetchNetwork(prev_clustering, model, rlist, False, core_only, accessory_only)
Expand Down
6 changes: 5 additions & 1 deletion test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@
sys.stderr.write("Running GMM model fit (--fit-model gmm)\n")
subprocess.run("python ../poppunk-runner.py --fit-model bgmm --ref-db example_db --K 4 --overwrite", shell=True, check=True)

#fit GMM
#fit dbscan
sys.stderr.write("Running DBSCAN model fit (--fit-model dbscan)\n")
subprocess.run("python ../poppunk-runner.py --fit-model dbscan --ref-db example_db --output example_dbscan --overwrite --graph-weights", shell=True, check=True)

#refine model with GMM
sys.stderr.write("Running model refinement (--fit-model refine)\n")
subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite", shell=True, check=True)
subprocess.run("python ../poppunk-runner.py --fit-model refine --ref-db example_db --output example_refine --neg-shift 0.8 --overwrite --indiv-refine both", shell=True, check=True)
subprocess.run("python ../poppunk-runner.py --fit-model threshold --threshold 0.003 --ref-db example_db --output example_threshold", shell=True, check=True)

# lineage clustering
sys.stderr.write("Running lineage clustering test (--fit-model lineage)\n")
Expand All @@ -52,6 +53,9 @@
# viz
sys.stderr.write("Running visualisations (poppunk_visualise)\n")
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --output example_viz --microreact", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --output example_viz --cytoscape", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --output example_viz --phandango", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --output example_viz --grapetree", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --output example_viz_subset --microreact --include-files subset.txt", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --query-db example_query --output example_viz_query --microreact", shell=True, check=True)
subprocess.run("python ../poppunk_visualise-runner.py --ref-db example_db --previous-clustering example_lineages --model-dir example_lineages --output example_lineage_viz --microreact", shell=True, check=True)
Expand Down

0 comments on commit 55e99c1

Please sign in to comment.