In [None]:
import os

import h5py
import pandas as pd
from IPython.display import HTML
from IPython.display import display as ipy_display

In [None]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", 0)
pd.options.display.float_format = "{:,.2f}".format

In [None]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [None]:
top_metrics_file = "top_metrics_for_tbl.csv"
input_h5_file = ""
input_base_file_name = ""

In [None]:
if not os.path.isfile(top_metrics_file):
    raise ValueError(f"Input {top_metrics_file} does not exist")

In [None]:
dict_features = {row.iloc[0]: row.iloc[1] for _, row in pd.read_csv(top_metrics_file).iterrows()}
df_features = pd.read_csv(top_metrics_file)
list_metrics = list(set(df_features["metric"]))

In [None]:
# get Keys within the H5 file
f = h5py.File(input_h5_file, "r")
list_keys = list(f.keys())
hist_list_keys = [i for i in list_keys if i.startswith("histogram_")]
tbl_list_keys = pd.DataFrame(list(set(list_keys) - set(hist_list_keys)))
tbl_list_keys.columns = ["metric"]
del list_keys

# create table merging top required metrics to display and input provided
# ....................
tbl_top_values = df_features.merge(tbl_list_keys, on="metric", how="inner")

In [None]:
HTML("<b>" + "" + "</b>")
HTML("<hr/>")
HTML('<h2 style="font-size:20px;">' + "Input parameters" + "</h2>")
HTML("<hr/>")
HTML("<b>" + "" + "</b>")

In [None]:
to_print_sample_info = pd.DataFrame(
    data={"value": [input_base_file_name, str(input_h5_file)]}, index=["Sample name", "h5 file"]
)

to_print_sample_info["value"] = to_print_sample_info["value"].str.wrap(100)


def wrap_df_text(df):
    return ipy_display(HTML(df.to_html().replace("\\n", "<br>")))


wrap_df_text(to_print_sample_info.style.set_properties(**{"text-align": "left"}))

In [None]:
HTML("<b>" + "" + "</b>")
HTML("<hr/>")
HTML('<h2 style="font-size:20px;">' + "Summary View: Main Metrics" + "</h2>")
HTML("<hr/>")
HTML("<b>" + "" + "</b>")

In [None]:
separator = "___"
to_print = pd.DataFrame()

tbl_top = pd.DataFrame()
for temp_metric in tbl_top_values["metric"].unique():
    list_top_tbl = sub_top_tbl = pd.DataFrame()
    sub_top_tbl = tbl_top_values[tbl_top_values["metric"] == temp_metric]
    df_h5_tbl = pd.read_hdf(input_h5_file, temp_metric).T
    df_h5_tbl = df_h5_tbl.reset_index()

    # stats_coverage is a multiindex dataframe
    if temp_metric.startswith("stats"):
        df_h5_tbl["metric"] = df_h5_tbl["level_0"] + separator + df_h5_tbl["level_1"]
        df_h5_tbl = df_h5_tbl.drop(columns=["level_0", "level_1"]).copy()
        df_h5_tbl.columns = ["value", "key"]
        df_h5_tbl = df_h5_tbl[["key", "value"]]
        list_top_tbl = df_h5_tbl.merge(sub_top_tbl, on="key", how="inner")
        to_print = pd.concat((to_print, list_top_tbl))

    else:
        df_h5_tbl.columns = ["key", "value"]
        list_top_tbl = df_h5_tbl.merge(sub_top_tbl, on="key", how="inner")
        to_print = pd.concat((to_print, list_top_tbl))

to_print.index = to_print["key"]
to_print = to_print.rename({c: c.replace("PCT_", "% ") for c in to_print.index})
to_print = to_print.rename({c: c.replace("PERCENT_", "% ") for c in to_print.index})
to_print.index.name = None
to_print = to_print.rename(columns={"value": ""})
ipy_display(to_print[""].to_frame())

In [None]:
HTML("<b>" + "" + "</b>")
HTML("<hr/>")
HTML('<h2 style="font-size:20px;">' + "Detailed View: All Metrics" + "</h2>")
HTML("<hr/>")
HTML("<b>" + "" + "</b>")

In [None]:
to_print = pd.DataFrame()
sorted_keys = tbl_list_keys["metric"].sort_values()

for tbl_key in sorted_keys:
    HTML("<br>" + "<br>" + "<b>" + "Metric type: " + tbl_key + "</b>" + "<br>")
    to_print = pd.read_hdf(input_h5_file, tbl_key).T
    to_print = to_print.rename(columns={0: ""})
    if not isinstance(to_print.index[0], tuple):
        to_print = to_print.rename({c: c.replace("PCT_", "% ") for c in to_print.index})
        to_print = to_print.rename({c: c.replace("PERCENT_", "% ") for c in to_print.index})
    else:
        to_print.index = to_print.index.set_levels(to_print.index.levels[1].str.replace("percent_", "% "), level=1)
    ipy_display(to_print)