In [None]:
bracken = ""
software = ""

# Bracken 
Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.

In [None]:
from pathlib import Path
import pandas as pd

def bracken_results(path):
    if Path(path).exists():
        df = pd.read_csv(path, sep='\t')
        df.drop(columns=["taxonomy_id", "taxonomy_lvl"], inplace=True)
        df = df.loc[:, ~df.columns.str.contains("_num")]
        df.columns = df.columns.str.split(r'(_frac)').str.get(0)
        df.set_index('name', inplace=True)
        df = df.nlargest(100,df.columns.to_list(),keep='all')
        df.reset_index(inplace=True)
        return df
    else:
        return None

bracken_df = bracken_results(bracken)

## Relative Abundance Plot
An interactive, relative taxonomic abundance plot of the top 100 most represented species across submitted samples. 


In [None]:
import plotly.express as px

def plot_bracken(df):
    if df is not None:
        # Rename and melt columns
        new_columns = {"name": "Species", "variable": "Sample", "value": "Fraction Abundance"}
        new_df = df.melt(id_vars="name").rename(columns=new_columns)

        # Generate express chart
        figx = px.bar(new_df, y="Sample", x="Fraction Abundance", color="Species")

        figx.update_layout(
            yaxis_title="Sample",
            xaxis_title="Relative Abundance (%)",
            bargap=0,
        )

        return figx
    else:
        return 

plot_bracken(bracken_df)

## Relative Abundance Table
Tabulated top 100 hits  

In [None]:
bracken_df

# Software Versions
Collected at run time from software output

In [None]:
import yaml
from platform import python_version
import pandas as pd

def create_software_table(path):
    if Path(path).exists():

        with open(path) as f:
            versions = yaml.load( f, Loader=yaml.BaseLoader)

        software_dict = {
            'Process Name': [],
            'Software': [],
            'Version': []
        }
        for process, process_values in versions.items():
            for software, version in process_values.items():
                software_dict['Process Name'].append(process)
                software_dict['Software'].append(software)
                software_dict['Version'].append(version)
        software_df = pd.DataFrame.from_dict(software_dict)
        return software_df
    else:
        return None

software_df = create_software_table(software)
software_df