## Individual Result Visualization
aka you don't need to do any categorization yet... (will change if can automatically categorize)

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
# Define NeurIPS-style font sizes
TITLE_SIZE = 18
LABEL_SIZE = 15
TICK_SIZE = 12
# Widgets
disease_map = {
    "Bipolar": "DGIDB_BIPOLAR",
    "Schizophrenia": "DGIDB_SCHIZOPHRENIA",
    "Breast Cancer": "DGIDB_BREAST_CANCER",
    "FULL": "DGIDB_FULL"
}

disease_selector = widgets.Dropdown(
    options=disease_map.keys(),
    description='Disease:',
)

results_type_selector = widgets.Dropdown(
    options=["unidirectional_multilayer", "multilayer", "single_layer"],
    description='Result Type:',
)

paper_filter = widgets.Checkbox(
    value=False,
    description='Only genes linked in literature',
)

output = widgets.Output()

def load_disease_data(disease, result_type):
    prefix = "../Methods/output"
    file_name = f"{result_type}_rwr_results.csv"
    file_path = f"{prefix}/{disease_map[disease]}/{file_name}"

    return pd.read_csv(file_path)

def update_graphs(change):
    with output:
        clear_output(wait=True)

        disease = disease_selector.value
        result_type = results_type_selector.value

        try:
            df = load_disease_data(disease, result_type)
            df_top_10 = df.sort_values(by='Score', ascending=False).head(10)

            plt.figure(figsize=(12, 6))
            sns.barplot(x='claim_name', y='Score', data=df_top_10, color="#4C72B0")  # single professional color
            plt.title(f'Top 10 Genes for {disease} ({result_type})', fontsize=TITLE_SIZE)
            plt.xlabel('Gene Claim Name', fontsize=LABEL_SIZE)
            plt.ylabel('Weighted Score', fontsize=LABEL_SIZE)
            plt.xticks(rotation=45, fontsize=TICK_SIZE)
            plt.yticks(fontsize=TICK_SIZE)
            plt.tight_layout()
            plt.show()
            plt.close()

            # Plot 2: Histogram of Score Bins
            bins = np.linspace(0, df['Score'].max(), 10)
            df['score_ratio_bin'] = pd.cut(df['Score'], bins=bins, include_lowest=True)
            bin_counts = df['score_ratio_bin'].value_counts().sort_index()

            ax = bin_counts.plot(
                kind='bar',
                color='skyblue',
                figsize=(12, 5)
            )

            for idx, count in enumerate(bin_counts):
                ax.text(
                    idx, count + 1, str(count),
                    ha='center', va='bottom',
                    fontsize=9, fontweight='bold'
                )

            plt.ylabel('Number of Genes')
            plt.xlabel('Score Bin')
            plt.title(f'Gene Count by Score Bin for {disease} ({result_type})')
            plt.tight_layout()
            plt.show()
            plt.close()

        except Exception as e:
            print(f"Error: {e}")

# Display and observer setup
display(disease_selector, results_type_selector, 
        #ratio_slider, 
        paper_filter, output)

# Register observers AFTER initial plot to avoid double plotting
update_graphs(None)

disease_selector.observe(update_graphs, names='value')
results_type_selector.observe(update_graphs, names='value')
# ratio_slider.observe(update_graphs, names='value')
paper_filter.observe(update_graphs, names='value')


Dropdown(description='Disease:', options=('Bipolar', 'Schizophrenia', 'Breast Cancer', 'FULL'), value='Bipolar…

Dropdown(description='Result Type:', options=('unidirectional_multilayer', 'multilayer', 'single_layer'), valu…

Checkbox(value=False, description='Only genes linked in literature')

Output()

In [3]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt

# Disease map
disease_map = {
    "Bipolar": "DGIDB_BIPOLAR",
    "Schizophrenia": "DGIDB_SCHIZOPHRENIA",
    "Breast Cancer": "DGIDB_BREAST_CANCER",
    "FULL": "DGIDB_FULL"
}

# Widgets
disease1_selector = widgets.Dropdown(
    options=disease_map.keys(),
    description='Disease 1:',
)

disease2_selector = widgets.Dropdown(
    options=disease_map.keys(),
    description='Disease 2:',
)

top_n_selector = widgets.IntSlider(
    value=10,
    min=0,
    max=1000,
    step=10,
    description='Top N:',
)

result_type_selector = widgets.Dropdown(
    options=["unidirectional_multilayer", "multilayer", "single_layer"],
    description='Result Type:',
)

run_button = widgets.Button(description="Compare Diseases")
output = widgets.Output()

# Data loader
def load_disease_data(disease, result_type):
    prefix = "../Methods/output"
    file_name = f"{result_type}_rwr_results.csv"
    file_path = f"{prefix}/{disease_map[disease]}/{file_name}"
    return pd.read_csv(file_path)

# Comparison logic
def sanitize(name):
    return name.replace(" ", "_").replace("-", "_")

def compare_diseases(b):
    with output:
        output.clear_output()
        disease1 = disease1_selector.value
        disease2 = disease2_selector.value
        top_n = top_n_selector.value
        result_type = result_type_selector.value

        try:
            df1 = load_disease_data(disease1, result_type)
            df2 = load_disease_data(disease2, result_type)

            top_df1 = df1[['claim_name', 'Score']]
            top_df2 = df2[['claim_name', 'Score']]

            # Sanitize disease names for use in column names
            safe_disease1 = sanitize(disease1)
            safe_disease2 = sanitize(disease2)
            # Rename scores
            top_df1 = top_df1.rename(columns={'Score': f'Score_{safe_disease1}'})
            top_df2 = top_df2.rename(columns={'Score': f'Score_{safe_disease2}'})
            # Merge on claim_name
            merged = pd.merge(top_df1, top_df2, on='claim_name')

            if merged.empty:
                print("No overlapping claim_names found in top N.")
                return

            # Compute ratio
            merged['Score_Ratio'] = merged[f'Score_{safe_disease1}'] / merged[f'Score_{safe_disease2}']
            merged = merged.sort_values(by='Score_Ratio', ascending=False)
            merged = merged.head(top_n)
            print(merged)
            print(len(merged))
            # Plot
            plt.figure(figsize=(12, 6))
            sns.barplot(x='claim_name', y='Score_Ratio', data=merged, color="#4C72B0")
            plt.title(f'Score Ratio ({disease1} / {disease2}) — Top {top_n} Overlapping Genes', fontsize=TITLE_SIZE)
            plt.xlabel('Gene Claim Name', fontsize=LABEL_SIZE)
            plt.ylabel('Score Ratio', fontsize=LABEL_SIZE)
            plt.xticks(rotation=45, fontsize=TICK_SIZE)
            plt.yticks(fontsize=TICK_SIZE)
            plt.tight_layout()
            plt.show()
            plt.close()

        except Exception as e:
            print(f"Error: {e}")

run_button.on_click(compare_diseases)

# Display widgets
display(widgets.VBox([
    disease1_selector,
    disease2_selector,
    result_type_selector,
    top_n_selector,
    run_button,
    output
]))


VBox(children=(Dropdown(description='Disease 1:', options=('Bipolar', 'Schizophrenia', 'Breast Cancer', 'FULL'…