In [None]:
bracken = "/home/nomo/research/aligndx/backend/data/results/bracken_combined.tsv"
software = "/home/nomo/research/aligndx/backend/data/results/eb11175c-1882-4754-a3cd-f9a21a033ccd/pipeline_info/software_versions.yml"
panel = 'Respiratory'

In [None]:
#utils
from IPython.display import Markdown

def makemd(md):
    display(Markdown(md))

# Detection Table 

In [None]:
import pandas as pd
from pathlib import Path

panels = Path('__file__').parent / 'panels.csv'

selected_panel = """
Detections were generated using the selected {panel} pathogen panel with an abundance threshold of {thresh}
"""

def get_bracken_results(path):
    df = pd.read_csv(path, sep='\t')
    df.drop(columns=["taxonomy_id", "taxonomy_lvl"], inplace=True)
    df = df.loc[:, ~df.columns.str.contains("_num")]
    df.columns = df.columns.str.split(r'(_frac)').str.get(0)
    return df

def create_detection_table(bracken_path, panels_path, panel, thresh):
    if Path(bracken_path).exists():
        makemd(selected_panel.format(panel=panel, thresh=thresh))
        bracken_results = get_bracken_results(bracken_path)
        metadata = pd.read_csv(panels_path, sep=',', index_col=0)
        pathogens = metadata[metadata[panel] == 'Y'][['Organism description', 'Name']].reset_index(drop=True).rename(columns={'Organism description': 'Pathogen', 'Name':'name'})
        pathogens = pathogens.merge(bracken_results, how='left').fillna(0).drop(columns=["name"])
        
        detections = pathogens.set_index('Pathogen').applymap(lambda x: 'Y' if x > thresh else 'N' )
        abundance = pathogens
        return (detections, abundance)
    else:
        return None

detections, abundance = create_detection_table(bracken, panels, panel, thresh=0)
detections

## Relative Abundance Plot
An interactive, relative taxonomic abundance plot of species across the submitted samples. Note that human taxa was removed, thus the chart below represents abundance of microbial species, within the fraction of reads detected as microbial.


In [None]:
import plotly.express as px

def plot_bracken(df):
    # Rename and melt columns
    new_columns = {"variable": "Sample", "value": "Fraction Abundance"}
    new_df = df.melt(id_vars="Pathogen").rename(columns=new_columns)

    # Generate express chart
    figx = px.bar(new_df, y="Sample", x="Fraction Abundance", color="Pathogen")

    figx.update_layout(
        yaxis_title="Sample",
        xaxis_title="Relative Abundance (%)",
        bargap=0,
    )
    
    return figx

plot_bracken(abundance)

# Software Versions
Collected at run time from software output

In [None]:
import yaml
from platform import python_version
import pandas as pd

with open(software) as f:
    versions = yaml.load( f, Loader=yaml.BaseLoader)

software_dict = {
    'Process Name': [],
    'Software': [],
    'Version': []
}
for process, process_values in versions.items():
    for software, version in process_values.items():
        software_dict['Process Name'].append(process)
        software_dict['Software'].append(software)
        software_dict['Version'].append(version)

software_df = pd.DataFrame.from_dict(software_dict)
software_df