In [5]:
import os
import pandas as pd

folder_path = "C:/Users/anton/Dev/ABM/to_plot/2"
files = os.listdir(folder_path)
files = [file for file in files if file.endswith(".csv")]
keys = ["0_25", "0_5", "0_75", "1_0"]

# Create a dictionary of dataframes
dataframes = {key: pd.read_csv(os.path.join(folder_path, file))
              for file in files
              for key in keys
              if key in file}

dataframes_keys = dataframes.keys()

In [6]:
for key, df in dataframes.items():
    # Drop the 'Efficiency' column if it exists
    if 'Efficiency' in df.columns:
        df.drop('Efficiency', axis=1, inplace=True)
    
    # Calculate efficiency
    efficiency = (df['Target Route Energy'] / df['Best Route Energy']) * 100
    
    # Insert the 'Efficiency' column at the 3rd position (index 2)
    df.insert(3, 'Efficiency', efficiency)


dataframes['0_25'].head()

Unnamed: 0,Step,Performance,Game Manager Index,Efficiency,Best Route Energy,Curriculum Step,Target Route Energy,Improvement,Gap,Action_0,Action_1,Action_2,Action_3,Action_4,Action_5,Action_6,Action_7,Action_8,Action_9,Action_10
0,2048,190644.343715,1,16.0,25,0,4,78.28,5.25,0.095703,0.09082,0.083984,0.09668,0.094238,0.084473,0.097168,0.094727,0.088379,0.089355,0.084473
1,4096,137506.053903,2,3.361345,119,0,4,-0.789916,28.75,0.085449,0.10498,0.085449,0.102539,0.081055,0.095703,0.095215,0.087891,0.086426,0.086914,0.088379
2,6144,327899.562817,2,3.361345,119,0,4,-0.789916,28.75,0.088867,0.09668,0.092773,0.09375,0.093262,0.094238,0.095215,0.091309,0.085449,0.085449,0.083008
3,8192,182067.888602,3,0.0814,4914,0,4,-0.994912,1227.5,0.089355,0.090332,0.099121,0.092285,0.086426,0.103027,0.091309,0.084961,0.096191,0.086914,0.080078
4,10240,372336.606553,3,23.529412,17,0,4,0.470588,3.25,0.101562,0.086426,0.092773,0.09668,0.091797,0.091309,0.095215,0.081543,0.084961,0.087402,0.090332


In [7]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import math

def aggregate_data(df, window):
    """Aggregate data based on a moving window."""
    return df.rolling(window=int(window), on='Step', min_periods=1).mean().dropna()

def plot_performance_comparison(df_list, column, interesting_columns, model_names=None, max_aggregation=50):
    """
    Create an interactive Plotly plot comparing performance of models with steps as the x-axis,
    including a slider for continuous data aggregation and buttons to switch between different columns.
    
    Parameters:
    df_list (list): List of DataFrames containing 'Step' and performance columns for each model
    column (str): The initial column to plot
    model_names (list, optional): Names of the models for the legend. If None, default names will be used.
    max_aggregation (int, optional): Maximum aggregation window size. Default is 50.
    
    Returns:
    plotly.graph_objects.Figure: The interactive plot
    """
    
    # Generate default model names if not provided
    if model_names is None:
        model_names = [f'Model {i+1}' for i in range(len(df_list))]
    
    # Ensure we have enough model names
    if len(model_names) < len(df_list):
        model_names.extend([f'Model {i+1}' for i in range(len(model_names), len(df_list))])
    
    # Create subplot with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add traces for each model
    for df, name in zip(df_list, model_names):
        fig.add_trace(
            go.Scatter(x=df['Step'], y=df[column], name=name, mode='lines+markers'),
            secondary_y=False
        )
    
    # Update layout
    fig.update_layout(
        title={
            'text': 'Performance Comparison of Models',
            'y': 0.95,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        xaxis_title='Steps',
        yaxis_title=column,
        legend_title='Models',
        hovermode='x unified',
        margin=dict(t=120)  # Increase top margin to make room for buttons
    )
    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeslider=dict(visible=True),
            type="linear"
        )
    )
    
    # Add aggregation slider
    steps = []
    for i in range(max_aggregation):
        step = dict(
            method="update",
            args=[{"y": [aggregate_data(df, i+1)[column] for df in df_list]}],
            label=str(i+1)
        )
        steps.append(step)
    sliders = [dict(
        active=0,
        currentvalue={"prefix": "Aggregation Window: "},
        pad={"t": 50},
        steps=steps
    )]
    
    fig.update_layout(
        sliders=sliders
    )
    
    # Add buttons to switch between columns
    columns_to_plot = df_list[0].columns.drop('Step').tolist()
    # Filter out columns that are not in interesting_columns
    if interesting_columns is not None:
        columns_to_plot = [col for col in columns_to_plot if col in interesting_columns]
    buttons = []
    for col in columns_to_plot:
        button = dict(
            label=col,
            method="update",
            args=[{"y": [df[col] for df in df_list]},
                  {"yaxis.title": col}]
        )
        buttons.append(button)
    
    # Distribute buttons across 2 layers
    num_layers = 2
    buttons_per_layer = math.ceil(len(buttons) / num_layers)
    
    button_layers = []
    for i in range(num_layers):
        start_idx = i * buttons_per_layer
        end_idx = min((i + 1) * buttons_per_layer, len(buttons))
        button_layer = dict(
            type="buttons",
            direction="right",
            active=-1,
            x=0.5,
            xanchor="center",
            y=1.42 - i * 0.2,  # Adjust vertical positioning with more space between layers
            buttons=buttons[start_idx:end_idx]
        )
        button_layers.append(button_layer)
    
    fig.update_layout(updatemenus=button_layers)
    
    return fig

# Plot the performance comparison
model_names = []
df_list = []
for key in keys:
    model_names.append(f'Model {key}')
    df_list.append(dataframes.get(key))

interesting_columns = ['Performance', 'Best Route Energy',
                       'Target Route Energy', 'Efficiency', 'Improvement',
                       'Gap']
initial_column = interesting_columns[3]  # 'Efficiency'
fig = plot_performance_comparison(df_list, initial_column, interesting_columns=interesting_columns, model_names=model_names, max_aggregation=50)
fig.show()