Skip to content

Commit

Permalink
fix: Remove large dataset error on the rSPR entry (#177)
Browse files Browse the repository at this point in the history
* RSPR approx  and exact scripts

* Add functional documentation and update support threshold

* Fix rspr argument

* Use clustering information from rSPR

* Generate cluster heatmap

* Revert "Generate cluster heatmap"

This reverts commit f00f491.

* Revert "Use clustering information from rSPR"

This reverts commit ee043b3.

* Expect path to the csv containing gene tree paths in rspr approx

* refactor: Accept samplesheet as input in rspr_approx

---------

Co-authored-by: KARTIK KAKADIYA <20909285+ktkakadiya@users.noreply.github.com>
Co-authored-by: ktkakadiya <99840261+ktkakadiya@users.noreply.github.com>
  • Loading branch information
3 people committed Nov 22, 2023
1 parent b22988b commit 9a119b4
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 15 deletions.
9 changes: 5 additions & 4 deletions bin/rspr_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def parse_args(args=None):
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument("-c", "--core", dest="CORE_TREE", help="Core tree")
parser.add_argument(
"-a", "--acc", dest="GENE_TREES", nargs="+", help="Gene tree list"
"-a", "--acc", dest="GENE_TREES", help="Gene tree samplesheet path"
)
parser.add_argument(
"-ann",
Expand Down Expand Up @@ -98,15 +98,15 @@ def root_tree(input_path, output_path):
### FUNCTION ROOT_TREE
### Root all the unrooted input trees in directory
### core_tree: path of the core tree
### gene_trees: input gene tree directory path
### gene_trees: path of the csv file containing all the gene tree paths
### output_dir: output directory path
### results: dataframe of the results to store tree size
### merge_pair: boolean to check whether to merge coer tree and gene tree in a single file
### RETURN path of the rooted gene trees directory
#####################################################################


def root_trees(core_tree, gene_trees, output_dir, results, merge_pair=False):
def root_trees(core_tree, gene_trees_path, output_dir, results, merge_pair=False):
print("Rooting trees")
#'''
reference_tree = core_tree
Expand All @@ -119,8 +119,9 @@ def root_trees(core_tree, gene_trees, output_dir, results, merge_pair=False):
)
refer_content, refer_tree_size = root_tree(reference_tree, rooted_reference_tree)

df_gene_trees = pd.read_csv(gene_trees_path)
rooted_gene_trees_path = os.path.join(output_dir, "rooted_gene_trees")
for filename in gene_trees:
for filename in df_gene_trees["path"]:
basename = Path(filename).name
rooted_gene_tree_path = os.path.join(rooted_gene_trees_path, basename)
gene_content, gene_tree_size = root_tree(filename, rooted_gene_tree_path)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/rspr/approx.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ process RSPR_APPROX {
"""
rspr_approx.py \\
--core $core_tree \\
--acc \$(cat $gene_tree_list) \\
--acc $gene_tree_list \\
--annotation $annotation \\
-o approx \\
--min_rspr_distance $min_rspr_distance \\
Expand Down
8 changes: 1 addition & 7 deletions subworkflows/local/rspr.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,11 @@ workflow RSPR {

take:
core_tree
gene_trees
gene_tree_sheet
annotation

main:

gene_trees
.flatten()
.map{it -> it.toString() }
.collectFile(name: 'gene_tree_paths.txt', newLine: true)
.set{ gene_tree_sheet }

RSPR_APPROX (
core_tree,
gene_tree_sheet,
Expand Down
2 changes: 0 additions & 2 deletions subworkflows/local/rspr_input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ workflow RSPR_INPUT_CHECK {

main:
samplesheet
.splitCsv(header: true)
.map { it -> get_sample_info_rspr(it.path) }
.set { trees }

emit:
Expand Down
15 changes: 14 additions & 1 deletion workflows/arete.nf
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,22 @@ workflow ARETE {
}

if (params.run_rspr) {
PHYLOGENOMICS.out.gene_trees
.flatten()
.map{it -> it.toString() }
.collectFile(newLine: true) { item ->
["${item}.txt",
"sample,path\n" + item + ',' + item ]
}
.set { individual_sheets }

individual_sheets
.collectFile(name: 'gene_tree_paths.txt', skip:1 , keepHeader: true)
.set{ gene_tree_sheet }

RSPR (
PHYLOGENOMICS.out.core_tree,
PHYLOGENOMICS.out.gene_trees,
gene_tree_sheet,
ANNOTATE_ASSEMBLIES.out.annotation
)
}
Expand Down

0 comments on commit 9a119b4

Please sign in to comment.