In [None]:
bracken = "/home/nomo/research/aligndx/backend/data/results/1d2a5651-5356-42ee-963a-445481629ad5/bracken/bracken_combined.tsv"
software = "/home/nomo/research/aligndx/backend/data/results/1d2a5651-5356-42ee-963a-445481629ad5/pipeline_info/software_versions.yml"
panels = "/home/nomo/research/aligndx/backend/data/results/1d2a5651-5356-42ee-963a-445481629ad5/assets/panels.csv"
panel = ['Respiratory']

In [None]:
#utils
from IPython.display import Markdown

def makemd(md):
    display(Markdown(md))

In [None]:
import pandas as pd
from pathlib import Path

selected_panel = """
# Detections
## Detections Table 
Detections were generated using the selected {panel} pathogen panel with an abundance threshold of {thresh}
"""

def get_bracken_results(path):
    df = pd.read_csv(path, sep='\t')
    df.drop(columns=["taxonomy_id", "taxonomy_lvl"], inplace=True)
    df = df.loc[:, ~df.columns.str.contains("_num")]
    df.columns = df.columns.str.split(r'(_frac)').str.get(0)
    return df

def create_detection_table(bracken_path, panels_path, panel, thresh):
    if Path(bracken_path).exists():
        makemd(selected_panel.format(panel=panel, thresh=thresh))
        bracken_results = get_bracken_results(bracken_path)
        metadata = pd.read_csv(panels_path, sep=',', index_col=0)
        pathogens = metadata[metadata[panel] == 'Y'][['Organism description', 'Name']].reset_index(drop=True).rename(columns={'Organism description': 'Pathogen', 'Name':'name'})
        pathogens = pathogens.merge(bracken_results, how='left').fillna(0).drop(columns=["name"])
        
        detections = pathogens.set_index('Pathogen').applymap(lambda x: 'Y' if x > thresh else 'N' ).reset_index()
        abundance = pathogens
        return (detections, abundance)
    else:
        return None

detections, abundance = create_detection_table(bracken, panels, panel[0], thresh=0)
detections

In [None]:
import yaml
from platform import python_version
import pandas as pd

software_md = """
# Software Versions
Collected at run time from software output
"""

def create_software_table(path):
    if Path(path).exists():
        makemd(software_md)
        with open(path) as f:
            versions = yaml.load( f, Loader=yaml.BaseLoader)

        software_dict = {
            'Process Name': [],
            'Software': [],
            'Version': []
        }
        for process, process_values in versions.items():
            for software, version in process_values.items():
                software_dict['Process Name'].append(process)
                software_dict['Software'].append(software)
                software_dict['Version'].append(version)
        software_df = pd.DataFrame.from_dict(software_dict)
        return software_df
    else:
        return None

software_df = create_software_table(software)
software_df