In [None]:
import torch 
import pandas as pd
import math
import os
import pickle
import panel as pn
import holoviews as hv
import math
import sys

hv.extension("plotly")
pn.extension("plotly")
pn.extension('tabulator', theme='dark')
pn.config.theme = 'dark'
hv.renderer('plotly').theme = 'dark'

sys.path.append(os.path.abspath(os.path.join('..')))


device='cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# This notebook is used to generate the results for the paper
# It loads the latest results from the metrics/results folder and postprocesses them into md files

# get latest results from a folder
files_results = os.listdir("../metrics/results")
files_results.sort(key=lambda x: int(x.split("_")[0]))
with open(f"../metrics/results/{files_results[-1]}", 'rb') as f:
    evaluation_results_loaded = pickle.load(f)
    
evaluation_results = []
    
# postprocess
for i, (model_name, result, config, denoiser_config) in enumerate(evaluation_results_loaded):
    # add accuracy calculations
    for i in range(4):
        result[i]["accuracy"] = 100 - result[i]["total_abs_percentage_error"]
        
    # skip if denoiser_config have Conv kernel = 2, excluded from paper
    if denoiser_config is not None and int(denoiser_config.model.parameters.patch_size) != 4:
        print(f"Skipping {model_name} due to Conv kernel = 2")
        continue
    
    # skip if denoiser config have not 768 labels, excluded from paper
    if denoiser_config is not None and int(denoiser_config.model.parameters.label_num) != 784:
        print(f"Skipping {model_name} due to not 768 labels")
        continue
        
    evaluation_results.append((model_name, result, config, denoiser_config))

In [None]:

# Aggregated top results
rows = []
for model, result, unified_config, denoiser_config in evaluation_results:
    
    row = {
        "pred_real_mu": result[0]['accuracy'],
        "pred_real_lowess": result[1]['accuracy'],
        "pred_mock_mu": result[2]['accuracy'],
        "pred_mock_lowess": result[3]['accuracy'],
    }
    if unified_config:
        row['type']="Unified"
        row['classifier'] = unified_config.model.cls_head
        row["Label #"] = int(denoiser_config.model.parameters.label_num)
        row["Conv kernel"] = int(denoiser_config.model.parameters.patch_size)
    else:
        row['type'] = model
        row["classifier"] = ""
        row["Label #"] = ""
        row["Conv kernel"] = ""
    rows.append(row)
dataframe_results = pd.DataFrame(rows)

generate_result = lambda name, filtered_df: {
        "Classifier    ": name,
        "dense MD LOESS *mu*": f'{filtered_df.sort_values(by=["pred_mock_lowess"], ascending=False).iloc[0]["pred_mock_lowess"]:0.2f}%',
        "RD LOESS *mu*": f'{filtered_df.sort_values(by=["pred_real_lowess"], ascending=False).iloc[0]["pred_real_lowess"]:0.2f}%',
        "dense MD *mu*": f'{filtered_df.sort_values(by=["pred_mock_mu"], ascending=False).iloc[0]["pred_mock_mu"]:0.2f}%',
        "RD *mu*": f'**{filtered_df.sort_values(by=["pred_real_mu"], ascending=False).iloc[0]["pred_real_mu"]:0.2f}%**',
    }

results = [
     generate_result(
        "Conventional",
        dataframe_results.loc[dataframe_results['type'] == "Conventional separate classifier"]),
    generate_result(
        "Linear",
        dataframe_results.loc[(dataframe_results['type'] == "Unified") & 
                            (dataframe_results['classifier'] == "linear")]),
    generate_result(
        "Attention",
        dataframe_results.loc[(dataframe_results['type'] == "Unified") & 
                            (dataframe_results['classifier'] == "attn")]),  
    generate_result(
        "Zero-shot",
        dataframe_results.loc[dataframe_results['type'] == "Zero-shot unified classifier"]),
]  
        
dataframe_top_results = pd.DataFrame(results)
os.makedirs("../metrics/result_tables", exist_ok=True)
markdown = dataframe_top_results.to_markdown(index=False)
with open("../metrics/result_tables/cls_top_results.md", "w") as f:
    f.write(markdown)
table = pn.pane.DataFrame(dataframe_top_results, sizing_mode="stretch_both", max_height=900)
table

In [None]:
# MEASURED RESULTS

def roman_to_integer(roman_string):
    if roman_string == "I":
        return 1
    if roman_string == "II":
        return 2
    if roman_string == "III":
        return 3
    if roman_string == "IV":
        return 4
    if roman_string == "V":
        return 5
    if roman_string == "X":
        return 6

rows = []
for model, result, unified_config, denoiser_config in evaluation_results:
    real_mu_results = result[0]['pairs_measured_calculated']
    for result_tuple in real_mu_results:
        
        # filter out results with Conv kernel 2 for the paper
        if unified_config and denoiser_config.model.parameters.patch_size == 2:
            continue
        
        row = {
            'Chrom': roman_to_integer(result_tuple[1]),
            'RD *mu*': f"{result_tuple[2]:,}",
            'Predicted *mu*': f"{result_tuple[3]:,}",
            '**Delta (Megabase)**': f"**{(math.fabs(result_tuple[2] - result_tuple[3]) / 10**6):.2f}**" 
        }
        if unified_config:
            if unified_config.model.cls_head == "attn":
                row['Classifier            '] = "Attention head"
            if unified_config.model.cls_head == "linear":
                row['Classifier            '] = "Linear-layer head"
            row["Label #"] = int(denoiser_config.model.parameters.label_num)
            row["Convolutional kernel"] = int(denoiser_config.model.parameters.patch_size)
        else:
            row['Classifier            '] = model
            
            # for paper substitute Zero-shot unified classifier with Zero-shot head
            if model == "Zero-shot unified classifier":
                row['Classifier            '] = "Zero-shot head"
                
            # for paper substitute Conventional separate classifier with Conventional model
            if model == "Conventional separate classifier":
                row['Classifier            '] = "Conventional model"
                
            row["Label #"] = ""
            row["Convolutional kernel"] = ""
            
        # this mapping done for the paper, generalize in future
        if result_tuple[0] == "ot266":
            row['Allele'] = "*vab-3(ot266)*"
        elif result_tuple[0] == "G54":
            row['Allele'] = "*him-4(u924)*"
        elif result_tuple[0] == "G61":
            row['Allele'] = "*mec-1(u925)*"
            
        rows.append(row)
        
order = [
    'Classifier            ',
    # 'Label #', removed from paper
    # 'Convolutional kernel', removed from paper
    'Allele',
    'Chrom',
    'RD *mu*',
    'Predicted *mu*',
    '**Delta (Megabase)**',
]
        
        
dataframe = pd.DataFrame(rows)
dataframe = dataframe[order]
dataframe = dataframe.sort_values(by=['**Delta (Megabase)**']).reset_index(drop=True)

os.makedirs("../metrics/result_tables", exist_ok=True)
markdown = dataframe.to_markdown(index=False)
with open("../metrics/result_tables/genomes_results.md", "w") as f:
    f.write(markdown)

pn.extension('tabulator', theme='dark')
table = pn.pane.DataFrame(dataframe, sizing_mode="stretch_both", max_height=1800)
table