## Individual Result Visualization
aka you don't need to do any categorization yet... (will change if can automatically categorize)

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Widgets
disease_map = {
    "Bipolar": "DGIDB_BIPOLAR",
    "Schizophrenia": "DGIDB_SCHIZOPHRENIA",
    "Breast Cancer": "DGIDB_BREAST_CANCER",
    "FULL": "DGIDB_FULL"
}

disease_selector = widgets.Dropdown(
    options=disease_map.keys(),
    description='Disease:',
)

results_type_selector = widgets.Dropdown(
    options=["unidirectional_multilayer", "multilayer", "regular"],
    description='Result Type:',
)

# ratio_slider = widgets.FloatSlider(
#     value=5.0,
#     min=1.0,
#     max=10.0,
#     step=0.5,
#     description='Score Ratio >',
# )

paper_filter = widgets.Checkbox(
    value=False,
    description='Only genes linked in literature',
)

output = widgets.Output()

def load_disease_data(disease, result_type):
    prefix = "../Methods/output"
    file_name = f"{result_type}_rwr_results.csv"
    file_path = f"{prefix}/{disease_map[disease]}/{file_name}"

    return pd.read_csv(file_path)

def update_graphs(change):
    with output:
        clear_output(wait=True)

        disease = disease_selector.value
        result_type = results_type_selector.value

        try:
            df = load_disease_data(disease, result_type)
            df_top_10 = df.sort_values(by='Score', ascending=False).head(10)

            # Plot 1: Top 10 Barplot
            plt.figure(figsize=(12, 6))
            sns.barplot(x='claim_name', y='Score', data=df_top_10, palette='viridis')
            plt.title(f'Top 10 Genes for {disease} ({result_type})')
            plt.xlabel('Gene Claim Name')
            plt.ylabel('Weighted Score')
            plt.xticks(rotation=45)
            plt.tight_layout()
            plt.show()
            plt.close()

            # Plot 2: Histogram of Score Bins
            bins = np.linspace(0, df['Score'].max(), 10)
            df['score_ratio_bin'] = pd.cut(df['Score'], bins=bins, include_lowest=True)
            bin_counts = df['score_ratio_bin'].value_counts().sort_index()

            ax = bin_counts.plot(
                kind='bar',
                color='skyblue',
                figsize=(12, 5)
            )

            for idx, count in enumerate(bin_counts):
                ax.text(
                    idx, count + 1, str(count),
                    ha='center', va='bottom',
                    fontsize=9, fontweight='bold'
                )

            plt.ylabel('Number of Genes')
            plt.xlabel('Score Bin')
            plt.title(f'Gene Count by Score Bin for {disease} ({result_type})')
            plt.tight_layout()
            plt.show()
            plt.close()

        except Exception as e:
            print(f"Error: {e}")

# Display and observer setup
display(disease_selector, results_type_selector, 
        #ratio_slider, 
        paper_filter, output)

# Register observers AFTER initial plot to avoid double plotting
update_graphs(None)

disease_selector.observe(update_graphs, names='value')
results_type_selector.observe(update_graphs, names='value')
# ratio_slider.observe(update_graphs, names='value')
paper_filter.observe(update_graphs, names='value')


Dropdown(description='Disease:', options=('Bipolar', 'Schizophrenia', 'Breast Cancer', 'FULL'), value='Bipolar…

Dropdown(description='Result Type:', options=('unidirectional_multilayer', 'multilayer', 'regular'), value='un…

Checkbox(value=False, description='Only genes linked in literature')

Output()