In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
df = pd.read_csv("overall_results.csv")

In [10]:
df["Models"]

0              FMCIBExtractor
1               CTFMExtractor
2          CTClipVitExtractor
3              PASTAExtractor
4            VISTA3DExtractor
5               VocoExtractor
6             SUPREMExtractor
7             MerlinExtractor
8    MedImageInsightExtractor
9          ModelsGenExtractor
Name: Models, dtype: object

In [3]:

df["params"] = [184.48, 77.76, 25.89, 127.11, 174.95, 294.86, 19.07, 270.94, 616.00, 0]

In [101]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

font_size = 22
def prepare_data(df):
    # Rename the 'Models' column if needed
    if 'Models' in df.columns:
        df = df.rename(columns={'Models': 'Model'})
    
    # Extract only the model name and dataset columns
    dataset_cols = ['LUNA', 'DLCS', 'NSCLC_Radiomics', 'NSCLC_Radiogenomics', 'C4KC-KiTs', 'ColRecMet']
    necessary_cols = ['Model'] + dataset_cols
    
    # Keep only necessary columns (filter out unnamed columns)
    df = df[necessary_cols]
    
    # Calculate average score and standard deviation for each model
    df['Average Score'] = df[dataset_cols].mean(axis=1)
    df['Std Dev'] = df[dataset_cols].std(axis=1)
    
    # Sort by average score (descending)
    df = df.sort_values('Average Score', ascending=False)
    
    # Reset index for clean numbering
    df = df.reset_index(drop=True)
    
    # Add rank and tier information
    df['Rank'] = range(1, len(df) + 1)
    df['Tier'] = pd.cut(
        df['Average Score'], 
        bins=[0, 0.53, 0.63, 1], 
        labels=['Lower Tier', 'Middle Tier', 'Top Tier'],
        right=False
    )
    
    # Create dataset-specific rank columns
    for col in dataset_cols:
        # Create a new column with the dataset's rank (1 = best)
        df[f"{col}_Rank"] = df[col].rank(ascending=False).astype(int)
    
    return df

def create_rank_heatmap(df):
    # Get dataset columns
    dataset_cols = ['LUNA', 'DLCS', 'NSCLC_Radiomics', 'NSCLC_Radiogenomics', 'C4KC-KiTs', 'ColRecMet']
    rank_cols = [f"{col}_Rank" for col in dataset_cols]
    
    # Create a heatmap using Plotly Express for ranks
    fig = px.imshow(
        df[rank_cols].values,
        labels=dict(x="Dataset", y="Model", color="Rank (1=best)"),
        x=[col.replace('_Rank', '') for col in rank_cols],
        y=df['Model'],
        zmin=1,
        zmax=10,
        color_continuous_scale='Greens_r',  # Reversed scale so darker = better
        aspect="auto",
        title=''
    )
    

    
    # Add text annotations with the ranks
    for i in range(len(df)):
        for j, col in enumerate(rank_cols):
            rank_value = df[col].iloc[i]
            fig.add_annotation(
                x=j,
                y=i,
                text=f"#{rank_value}",
                showarrow=False,
                font=dict(
                    color='white',
                    size=font_size,
                    weight='normal'
                )
            )
    
    # Update layout with consistent font size
    fig.update_layout(
        height=1000,
        width=1200,
        template='plotly_white',
        font=dict(size=font_size)  # Global font size setting for all text elements
    )
        # Hide the colorscale
    fig.update_coloraxes(showscale=False)
    return fig


def create_model_comparison(df):
    """
    Create a model-focused comparison chart where each line represents a model
    """
    # Get dataset columns
    dataset_cols = ['LUNA', 'DLCS', 'NSCLC_Radiomics', 'NSCLC_Radiogenomics', 'C4KC-KiTs', 'ColRecMet']
    
    # Create a figure
    fig = go.Figure()
    font_size = 25
    
    # Define a minimalist color palette based on average score:
    # Use varying opacity on a black base to create subtle distinctions
    norm = (df['Average Score'] - df['Average Score'].min()) / (df['Average Score'].max() - df['Average Score'].min())
    import plotly.express as px
    greens_palette = px.colors.sequential.Blues
    colors = [greens_palette[int(n * (len(greens_palette) - 1))] for n in norm]
    
    # Add a trace for each model
    for i, row in df.iterrows():
        model_name = row['Model']
        model_data = row[dataset_cols].values
        
        line_style = 'solid'
        width = 2
        
        # Highlight top 3 models with thicker lines
        if row['Rank'] <= 3:
            width = 4
            
        # Add different line styles based on tier
        if row['Tier'] == 'Lower Tier':
            line_style = 'dot'
        elif row['Tier'] == 'Middle Tier':
            line_style = 'dash'
        
        # Add trace for this model
        fig.add_trace(go.Scatter(
            x=dataset_cols,
            y=model_data,
            mode='lines+markers',
            name=f"#{row['Rank']} {model_name}",
            line=dict(
                width=width, 
                dash=line_style,
                color=colors[i]
            ),
            marker=dict(size=8),
            hovertemplate="<b>%{fullData.name}</b><br>Dataset: %{x}<br>Score: %{y:.4f}<extra></extra>"
        ))
    
    # Add a line for the average performance across models for each dataset
    dataset_avgs = df[dataset_cols].mean()
    fig.add_trace(go.Scatter(
        x=dataset_cols,
        y=dataset_avgs,
        mode='lines+markers',
        name='Average Performance',
        line=dict(width=4, color='black', dash='dot'),
        marker=dict(size=10, color='black', symbol='diamond'),
        hovertemplate="<b>Average Performance</b><br>Dataset: %{x}<br>Score: %{y:.4f}<extra></extra>"
    ))
    
    # Update layout
    fig.update_layout(
        title='',
        xaxis_title='Datasets',
        yaxis_title='Performance Score',
        yaxis=dict(range=[0.4, 0.9]),
        legend=dict(
            title='Models (Ranked)',
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=1
        ),
        height=1200,
        width=1200,
        template='plotly_white',
        margin=dict(r=200),  # Extra margin for the legend
        font=dict(size=font_size)
    )
    # Add grid lines for better readability
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)')
    
    return fig

def main(your_dataframe):
    # Prepare the data
    clean_df = prepare_data(your_dataframe)
    
    # Create the plots
    rank_heatmap = create_rank_heatmap(clean_df)
    model_comparison = create_model_comparison(clean_df)
    
    # Show the plots
    rank_heatmap.show()
    model_comparison.show()  # This now shows the model-focused comparison
    
    # Save the plots
    rank_heatmap.write_html("model_performance_rank_heatmap.html")
    model_comparison.write_html("model_performance_by_model.html")
    
    return clean_df  # Return the processed dataframe for further use if needed


In [102]:
main(df)

Unnamed: 0,Model,LUNA,DLCS,NSCLC_Radiomics,NSCLC_Radiogenomics,C4KC-KiTs,ColRecMet,Average Score,Std Dev,Rank,Tier,LUNA_Rank,DLCS_Rank,NSCLC_Radiomics_Rank,NSCLC_Radiogenomics_Rank,C4KC-KiTs_Rank,ColRecMet_Rank
0,FMCIBExtractor,0.886008,0.675796,0.577038,0.587946,0.686944,0.577222,0.665159,0.119014,1,Top Tier,1,1,3,5,3,1
1,ModelsGenExtractor,0.806061,0.64532,0.577361,0.609598,0.733611,0.530185,0.650356,0.102598,2,Top Tier,2,2,2,4,1,2
2,VISTA3DExtractor,0.711121,0.607937,0.582637,0.622098,0.681667,0.487778,0.61554,0.078759,3,Middle Tier,3,3,1,1,4,5
3,SUPREMExtractor,0.645022,0.544219,0.560617,0.556027,0.718611,0.482037,0.584422,0.083846,4,Middle Tier,7,8,6,8,2,6
4,MerlinExtractor,0.637643,0.561696,0.569879,0.612946,0.6425,0.431296,0.575994,0.078445,5,Middle Tier,8,6,4,3,5,9
5,MedImageInsightExtractor,0.675719,0.584828,0.564796,0.560603,0.561944,0.49537,0.573877,0.058406,6,Middle Tier,4,5,5,7,8,4
6,PASTAExtractor,0.664839,0.556302,0.557108,0.569866,0.604167,0.464815,0.569516,0.065693,7,Middle Tier,5,7,7,6,6,7
7,CTFMExtractor,0.65431,0.592034,0.544208,0.620424,0.463333,0.452778,0.554515,0.083044,8,Middle Tier,6,4,8,2,10,8
8,CTClipVitExtractor,0.572564,0.494423,0.449703,0.510379,0.4925,0.495741,0.502552,0.039923,9,Lower Tier,9,10,10,9,9,3
9,VocoExtractor,0.493761,0.507139,0.526161,0.461384,0.563333,0.420741,0.49542,0.049865,10,Lower Tier,10,9,9,10,7,10
