In [15]:
import pandas as pd
import plotly.graph_objects as go

# Load data from Excel
excel_file = 'OVERALL RESULT.xlsx'
sheet_name = 'plot_CL'
df = pd.read_excel(excel_file, sheet_name=sheet_name, dtype={'Setting': str})
# setting_order = ['Low Exploration fraction', 'Medium Exploration fraction', 'High Exploration fraction']
# setting_order = ['1e-3', '1e-4', '1e-5']
setting_order = ['Default', 'Smaller', 'Equal']
df['Setting'] = pd.Categorical(df['Setting'], categories=setting_order)
display(df)

# Create a boxplot
fig = go.Figure()

# Add boxplot trace
fig.add_trace(go.Box(
    x=df['Setting'], 
    y=df['Tardiness'],
    boxpoints='outliers',
    fillcolor='white',
    marker=dict(color='blue'),
    line=dict(color='blue'),
    name='Tardiness'
))

# Calculate mean for each Setting and add as a scatter trace
mean_values = df.groupby('Setting')['Tardiness'].mean().reset_index()

fig.add_trace(go.Scatter(
    x=mean_values['Setting'],
    y=mean_values['Tardiness'],
    mode='lines+markers',
    line=dict(color='red'),
    marker=dict(color='red', size=8),
    name='Mean Tardiness'
))

# Customize layout
fig.update_layout(
    title='Tardiness Distribution by Setting',
    xaxis_title='Setting',
    yaxis_title='Tardiness (seconds)',
    template='plotly_white',
    xaxis=dict(
        tickfont=dict(size=14)  # Adjust the font size of x-axis labels here
    )
)

# Show the plot
fig.show()


Unnamed: 0,Setting,InstanceID,ScenarioID,Tardiness
0,Default,valid1,A,125720.000000
1,Default,valid1,B,0.000000
2,Default,valid1,C,0.000000
3,Default,valid2,A,0.000000
4,Default,valid2,B,0.000000
...,...,...,...,...
103,Equal,valid11,B,73668.333333
104,Equal,valid11,C,1460.000000
105,Equal,valid12,A,0.000000
106,Equal,valid12,B,1560.000000






In [16]:
# Ensure 'Setting' column is treated as categorical
df['Setting'] = df['Setting'].astype('category')

# Calculate median tardiness for each Setting
median_values = df.groupby('Setting')['Tardiness'].median().reset_index()

# Calculate the minimum tardiness within each group defined by InstanceID and ScenarioID
grouped = df.groupby(['InstanceID', 'ScenarioID'])

# Initialize a counter for each Setting
setting_min_counts = pd.Series(0, index=df['Setting'].unique())

# Count the minimum occurrences per Setting in each group
for name, group in grouped:
    min_tardiness = group['Tardiness'].min()
    min_settings = group[group['Tardiness'] == min_tardiness]['Setting'].unique()
    for setting in min_settings:
        setting_min_counts[setting] += 1

# Convert the counts to a DataFrame
min_counts = setting_min_counts.reset_index()
min_counts.columns = ['Setting', 'Count']

# Total number of InstanceID and ScenarioID combinations
total_combinations = len(grouped)

# Calculate the percentage
min_counts['Percentage'] = (min_counts['Count'] / total_combinations) * 100

# Ensure 'Setting' column in min_counts is treated as categorical
min_counts['Setting'] = min_counts['Setting'].astype('category')

# Sort values according to the order of categories in 'Setting'
# Reorder median values and min percentages according to the specified order
median_values['Setting'] = pd.Categorical(median_values['Setting'], categories=setting_order, ordered=True)
median_values = median_values.sort_values('Setting')

min_counts['Setting'] = pd.Categorical(min_counts['Setting'], categories=setting_order, ordered=True)
min_counts = min_counts.sort_values('Setting')

# Create the combined plot
fig = go.Figure()

# Add line plot for median values (add this first to ensure it's on top)
fig.add_trace(go.Scatter(
    x=median_values['Setting'],
    y=median_values['Tardiness'],
    mode='lines+markers',
    line=dict(color='red'),
    marker=dict(color='red', size=8),
    name='Median Tardiness',
    yaxis='y2'
))

# Add bar plot for percentage of minimum values
fig.add_trace(go.Bar(
    x=min_counts['Setting'],
    y=min_counts['Percentage'],
    name='Win rate',
    marker=dict(color='white', line=dict(color='blue', width=2)),
    width=0.3  # Adjust the width of the columns to make them thinner
))

# Customize layout
fig.update_layout(
    title='Win rate and Median of tardiness by Setting',
    xaxis_title='Setting',
    xaxis=dict(
        tickfont=dict(size=14)  # Adjust the font size of x-axis labels here
    ),
    yaxis=dict(
        title='Percentage (%)',
        titlefont=dict(color='black'),  # Change title font color to black
        tickfont=dict(color='black'),  # Change tick font color to black
        range=[0, 100]  # Ensure the y-axis starts at 0
    ),
    yaxis2=dict(
        title='Tardiness (seconds)',
        titlefont=dict(color='black'),  # Change title font color to black
        tickfont=dict(color='black'),  # Change tick font color to black
        overlaying='y',
        side='right',
        range=[0, median_values['Tardiness'].max() + 100],  # Ensure the y-axis starts at 0 and has a reasonable max value
        showgrid=False  # Hide the horizontal grid lines
    ),
    template='plotly_white',
    font=dict(color='black'),  # Change the font color of the entire plot to black
    legend=dict(
        x=1.1,
        y=1,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(0, 0, 0, 0)'
    )
)

# Show the plot
fig.show()



