Skip to content

Commit

Permalink
DEV: update postprocessing steps
Browse files Browse the repository at this point in the history
  • Loading branch information
Vini2 committed Aug 9, 2023
1 parent e91c9c4 commit 9657e53
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
5 changes: 4 additions & 1 deletion phables/workflow/rules/postprocess.smk
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@ rule koverage_postprocess:
"""Format TSV of samples and reads from Koverage"""
input:
koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"),
samples_file = os.path.join(OUTDIR, "phables.samples.tsv")
samples_file = os.path.join(OUTDIR, "phables.samples.tsv"),
seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta")
output:
os.path.join(OUTDIR, "sample_genome_read_counts.tsv")
params:
koverage_tsv = os.path.join(OUTDIR, "results", "sample_coverage.tsv"),
samples_file = os.path.join(OUTDIR, "phables.samples.tsv"),
seq_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges.fasta"),
info_file = os.path.join(OUTDIR, "genomes_and_unresolved_edges_info.tsv"),
output_path = OUTDIR,
log = os.path.join(LOGSDIR, "format_koverage_results_output.log")
log:
Expand Down
15 changes: 15 additions & 0 deletions phables/workflow/scripts/format_koverage_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import subprocess
from Bio import SeqIO
from collections import defaultdict

import pandas as pd
Expand All @@ -25,6 +26,8 @@ def main():

samples_file = snakemake.params.samples_file
koverage_tsv = snakemake.params.koverage_tsv
seq_file = snakemake.params.seq_file
info_file = snakemake.params.info_file
output_path = snakemake.params.output_path
log = snakemake.params.log

Expand Down Expand Up @@ -132,6 +135,18 @@ def main():
f"Estimated mean read depth of resolved genomes can be found in {output_path}sample_genome_mean_coverage.tsv"
)


# Make sequence information file
with open(info_file, "w") as myfile:
myfile.write(f"contig_phables_name\tlength\tcontig_or_phables\n")
for index, record in enumerate(SeqIO.parse(seq_file, "fasta")):
if "phage_comp" in record.id:
myfile.write(f"{record.id}\t{len(record.seq)}\tphables\n")
else:
myfile.write(f"{record.id}\t{len(record.seq)}\tcontig\n")

logger.info(f"Sequence information file can be found in {info_file}")

# Exit program
# --------------

Expand Down

0 comments on commit 9657e53

Please sign in to comment.