In [2]:
import pandas as pd
import plotly.graph_objects as go

# Load the dataset
file_path = r"C:\\Users\\c.hakker\\OneDrive - VISTA college\\Senior Stuff\\Opleiding Data science\\Data\\baseline-mean-errors-80072ned.xlsx"
data = pd.read_excel(file_path)

# Clean the dataset by identifying models
data['Model'] = data['sbi_title'].apply(lambda x: 'Baseline' if 'Baseline' in x else 'SARIMAX')
data['Cleaned_Branch'] = data['sbi_title'].str.replace("Baseline ", "").str.replace("SARIMAX ", "")

# Convert 'MAE' column to numeric
data['MAE'] = pd.to_numeric(data['MAE'], errors='coerce')

# Define custom colors for each branch
sarimax_colors = {'C Manufacturing': '#5fb5db', 'G Trade': '#3f9abf', 'Q Healthcare': '#006789'}
baseline_colors = {'C Manufacturing': '#febf01', 'G Trade': '#deaa00', 'Q Healthcare': '#fe9001'}

# Loop through years (2022 and 2023)
for year in [2022, 2023]:
    # Filter data for the year
    year_data = data[data['Year'] == year]

    # Aggregate data by averaging duplicate entries for the same quarter, branch, and model
    cleaned_data = year_data.groupby(['Cleaned_Branch', 'quarter', 'Model']).agg({'MAE': 'mean'}).reset_index()

    # Pivot the data for visualization
    pivot_data = cleaned_data.pivot(index=['Cleaned_Branch', 'quarter'], columns='Model', values='MAE').reset_index()

    # Create a combined y-axis label and sort by branch and quarter (ascending for Q1 to Q4)
    pivot_data['y_label'] = pivot_data['Cleaned_Branch'] + " Q" + pivot_data['quarter'].astype(str)
    pivot_data = pivot_data.sort_values(by=['Cleaned_Branch', 'quarter'], ascending=[False, False])

    # Create the Plotly figure
    fig = go.Figure()

    # Track unique branch-model combinations to avoid duplicate legend entries
    added_legends = set()

    # Add bars for each branch and quarter
    for _, row in pivot_data.iterrows():
        y_label = row['y_label']
        baseline_mae = row.get('Baseline', 0)  # Default to 0 if Baseline is missing
        sarimax_mae = row.get('SARIMAX', 0)  # Default to 0 if SARIMAX is missing
        branch = row['Cleaned_Branch']

        # Add Baseline bar
        if (branch, 'Baseline') not in added_legends:
            fig.add_trace(go.Bar(
                y=[y_label],
                x=[baseline_mae],
                name=f"{branch} Baseline ({year})",
                marker_color=baseline_colors[branch],
                orientation='h',
                text=[f"{baseline_mae:.2f}"],
                textposition='inside',
            ))
            added_legends.add((branch, 'Baseline'))
        else:
            fig.add_trace(go.Bar(
                y=[y_label],
                x=[baseline_mae],
                marker_color=baseline_colors[branch],
                orientation='h',
                showlegend=False,  # Prevent duplicate legend entry
                text=[f"{baseline_mae:.2f}"],
                textposition='inside',
            ))

        # Add SARIMAX bar
        if (branch, 'SARIMAX') not in added_legends:
            fig.add_trace(go.Bar(
                y=[y_label],
                x=[sarimax_mae],
                name=f"{branch} SARIMAX ({year})",
                marker_color=sarimax_colors[branch],
                orientation='h',
                text=[f"{sarimax_mae:.2f}"],
                textposition='inside',
            ))
            added_legends.add((branch, 'SARIMAX'))
        else:
            fig.add_trace(go.Bar(
                y=[y_label],
                x=[sarimax_mae],
                marker_color=sarimax_colors[branch],
                orientation='h',
                showlegend=False,  # Prevent duplicate legend entry
                text=[f"{sarimax_mae:.2f}"],
                textposition='inside',
            ))

    # Update the layout
    fig.update_layout(
        title=f"MAE Stacked Insights: Baseline vs SARIMAX Performance Across Quarters ({year})",
        xaxis_title=None,  # Remove x-axis text
        xaxis_showticklabels=False,  # Remove x-axis tick labels
        yaxis_title="Branch and Quarter",
        yaxis=dict(categoryorder='array', categoryarray=pivot_data['y_label']),  # Correct order for y-axis
        barmode='stack',
        legend_title="Model",
        template="plotly_white"
    )

    # Save the plot as an HTML file
    output_file = f"Stacked_MAE_Comparison_{year}.html"
    fig.write_html(output_file)

    # Show the plot for the year
    fig.show()


In [3]:
# Strip whitespace from all column names
data.columns = data.columns.str.strip()

In [4]:
# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Create a new column for formatted quarters
data['Quarter Label'] = 'Q' + data['quarter'].astype(str) + ' ' + data['Year'].astype(str)

# Filter data for 2022 and 2023
filtered_data = data[data['Year'].isin([2022, 2023])]

# Initialize the Plotly figure
fig = go.Figure()

# Define colors for each branch
colors = {
    'C Manufacturing': '#44b7c2',
    'Baseline C Manufacturing': '#81c1c6',
    'G Trade': '#024b7a',
    'Baseline G Trade': '#37759b',
    'Q Healthcare': '#ffae49',
    'Baseline Q Healthcare': '#d3a367'
}

# Plot the actual and predicted lines for each branch (excluding Baseline actuals)
for branch in filtered_data['sbi_title'].unique():
    if not branch.startswith('Baseline'):
        branch_data = filtered_data[filtered_data['sbi_title'] == branch]
        
        # Add actual values
        fig.add_trace(go.Scatter(
            x=branch_data['Quarter Label'],
            y=branch_data['Actual Sickleave'],
            mode='lines',
            name=f'{branch} Actual',
            line=dict(color=colors[branch])
        ))
        
        # Add predicted values
        fig.add_trace(go.Scatter(
            x=branch_data['Quarter Label'],
            y=branch_data['Predicted'],
            mode='lines',
            name=f'{branch} Predicted',
            line=dict(color=colors[branch], dash='dot')
        ))

# Plot the Baseline predicted values with custom colors
for branch in filtered_data['sbi_title'].unique():
    if branch.startswith('Baseline'):
        branch_data = filtered_data[filtered_data['sbi_title'] == branch]
        fig.add_trace(go.Scatter(
            x=branch_data['Quarter Label'],
            y=branch_data['Predicted'],
            mode='lines',
            name=f'{branch} Baseline Predicted',
            line=dict(color=colors[branch], dash='dash')
        ))


# Update the layout with a horizontal Y-axis title
fig.update_layout(
    title='Predicted vs. Actual Sick Leave (2022 & 2023)',
    xaxis_title='Quarter',
    template='plotly_white',
    xaxis=dict(showgrid=True, gridcolor='lightgrey'),
    yaxis=dict(
        showgrid=True,
        gridcolor='lightgrey',
        automargin=True
    ),
    width=1200,
    height=500,
    annotations=[
        dict(
            text='Sick Leave (%)',  # Y-axis title text
            x=-0.05,  # Adjust the X position (negative moves it left)
            y= 1.06,  # Center the text vertically
            xref='paper',
            yref='paper',
            showarrow=False,
            textangle=0,  # Keep the text horizontal
            font=dict(size=12)
        )
    ]
)


In [5]:
import pandas as pd
import plotly.subplots as sp
import plotly.graph_objects as go

# Load the dataset
file_path = r"C:\\Users\\c.hakker\\OneDrive - VISTA college\\Senior Stuff\\Opleiding Data science\\Data\\baseline-mean-errors-80072ned.xlsx"
data = pd.read_excel(file_path)

# Clean the dataset by identifying models
data['Model'] = data['sbi_title'].apply(lambda x: 'Baseline' if 'Baseline' in x else 'SARIMAX')
data['Cleaned_Branch'] = data['sbi_title'].str.replace("Baseline ", "").str.replace("SARIMAX ", "")

# Filter data for 2022 and 2023
filtered_data = data[data['Year'].isin([2022, 2023])]

# Define custom colors for each branch
sarimax_colors = {'C Manufacturing': '#5fb5db', 'G Trade': '#3f9abf', 'Q Healthcare': '#006789'}
baseline_colors = {'C Manufacturing': '#febf01', 'G Trade': '#deaa00', 'Q Healthcare': '#fe9001'}

# Initialize subplots with two vertical charts (2022 and 2023)
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("2022", "2023"))

# Track added models to avoid duplicate legend entries
added_legends = set()

# Loop through each year and correctly assign bars to the respective subplot column
for col, year in enumerate([2022, 2023], start=1):
    year_data = filtered_data[filtered_data['Year'] == year]
    year_data = year_data.groupby(['Cleaned_Branch', 'quarter', 'Model']).agg({'MAE': 'mean'}).reset_index()

    # Loop through each branch and quarter
    for branch in year_data['Cleaned_Branch'].unique():
        branch_data = year_data[year_data['Cleaned_Branch'] == branch]
        for quarter in range(1, 5):
            quarter_data = branch_data[branch_data['quarter'] == quarter]
            baseline_mae = quarter_data[quarter_data['Model'] == 'Baseline']['MAE'].mean()
            sarimax_mae = quarter_data[quarter_data['Model'] == 'SARIMAX']['MAE'].mean()

            # X-axis label
            x_label = f"{branch} Q{quarter}"

            # Add Baseline bar
            fig.add_trace(go.Bar(
                x=[x_label],
                y=[baseline_mae],
                name=f"Baseline {branch}" if f"Baseline {branch}" not in added_legends else None,
                marker_color=baseline_colors.get(branch, '#bbbbbb'),
                text=f"{baseline_mae:.2f}",
                textposition="outside",
                textangle=0,  # Horizontal text
                showlegend=f"Baseline {branch}" not in added_legends
            ), row=1, col=col)

            # Add SARIMAX bar
            fig.add_trace(go.Bar(
                x=[x_label],
                y=[sarimax_mae],
                name=f"SARIMAX {branch}" if f"SARIMAX {branch}" not in added_legends else None,
                marker_color=sarimax_colors.get(branch, '#bbbbbb'),
                text=f"{sarimax_mae:.2f}",
                textposition="outside",
                textangle=0,  # Horizontal text
                showlegend=f"SARIMAX {branch}" not in added_legends
            ), row=1, col=col)

            # Add to legends tracking set
            added_legends.add(f"Baseline {branch}")
            added_legends.add(f"SARIMAX {branch}")

# Update Y-axis settings for both subplots
fig.update_yaxes(showgrid=True, gridcolor="lightgrey", zeroline=False, title=None, showticklabels=False, row=1, col=1)
fig.update_yaxes(showgrid=True, gridcolor="lightgrey", zeroline=False, title=None, showticklabels=False, row=1, col=2)

# Update layout
fig.update_layout(
    title="MAE Comparison: Baseline vs SARIMAX (2022 & 2023)",
    barmode="stack",
    bargap=0.15,
    width=1200,
    height=600,
    showlegend=True,
    plot_bgcolor="white",
    paper_bgcolor="white",
    legend=dict(bordercolor="black", borderwidth=1)
)

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.subplots as sp
import plotly.graph_objects as go

# Load the dataset
file_path = r"C:\\Users\\c.hakker\\OneDrive - VISTA college\\Senior Stuff\\Opleiding Data science\\Data\\baseline-mean-errors-80072ned.xlsx"
data = pd.read_excel(file_path)

# Clean the dataset by identifying models
data['Model'] = data['sbi_title'].apply(lambda x: 'Baseline' if 'Baseline' in x else 'SARIMAX')
data['Cleaned_Branch'] = data['sbi_title'].str.replace("Baseline ", "").str.replace("SARIMAX ", "")

# Filter data for 2022 and 2023
filtered_data = data[data['Year'].isin([2022, 2023])]

# Define custom colors for each branch
sarimax_colors = {'C Manufacturing': '#5fb5db', 'G Trade': '#3f9abf', 'Q Healthcare': '#006789'}
baseline_colors = {'C Manufacturing': '#febf01', 'G Trade': '#deaa00', 'Q Healthcare': '#fe9001'}

# Initialize subplots with two vertical charts (2022 and 2023)
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("2022", "2023"))

# Track added models to avoid duplicate legend entries
added_legends = set()

# Loop through each year and correctly assign bars to the respective subplot column
for col, year in enumerate([2022, 2023], start=1):
    year_data = filtered_data[filtered_data['Year'] == year]
    year_data = year_data.groupby(['Cleaned_Branch', 'quarter', 'Model']).agg({'MAE': 'mean'}).reset_index()

    # Loop through each branch and quarter
    for branch in year_data['Cleaned_Branch'].unique():
        branch_data = year_data[year_data['Cleaned_Branch'] == branch]
        for quarter in range(1, 5):
            quarter_data = branch_data[branch_data['quarter'] == quarter]
            baseline_mae = quarter_data[quarter_data['Model'] == 'Baseline']['MAE'].mean()
            sarimax_mae = quarter_data[quarter_data['Model'] == 'SARIMAX']['MAE'].mean()

            # X-axis label
            x_label = f"{branch} Q{quarter}"

            # Add Baseline bar
            fig.add_trace(go.Bar(
                x=[x_label],
                y=[baseline_mae],
                name=f"Baseline {branch}" if f"Baseline {branch}" not in added_legends else None,
                marker_color=baseline_colors.get(branch, '#bbbbbb'),
                text=f"{baseline_mae:.2f}",
                textposition="outside",
                textangle=0,  # Horizontal text
                showlegend=f"Baseline {branch}" not in added_legends
            ), row=1, col=col)

            # Add SARIMAX bar
            fig.add_trace(go.Bar(
                x=[x_label],
                y=[sarimax_mae],
                name=f"SARIMAX {branch}" if f"SARIMAX {branch}" not in added_legends else None,
                marker_color=sarimax_colors.get(branch, '#bbbbbb'),
                text=f"{sarimax_mae:.2f}",
                textposition="outside",
                textangle=0,  # Horizontal text
                showlegend=f"SARIMAX {branch}" not in added_legends
            ), row=1, col=col)

            # Add to legends tracking set
            added_legends.add(f"Baseline {branch}")
            added_legends.add(f"SARIMAX {branch}")

# Update Y-axis settings to ensure consistent gridlines and no labels
fig.update_yaxes(
    showgrid=True,
    gridcolor="lightgrey",
    zeroline=False,
    title=None,
    showticklabels=False,
    range=[0, max(filtered_data['MAE']) + 0.1],
    row=1,
    col=1
)
fig.update_yaxes(
    showgrid=True,
    gridcolor="lightgrey",
    zeroline=False,
    title=None,
    showticklabels=False,
    range=[0, max(filtered_data['MAE']) + 0.1],
    row=1,
    col=2
)

# Update layout
fig.update_layout(
    title="MAE Comparison: Baseline vs SARIMAX (2022 & 2023)",
    barmode="stack",
    bargap=0.1,
    width=1200,
    height=600,
    showlegend=True,
    plot_bgcolor="white",
    paper_bgcolor="white",
    legend=dict(bordercolor="black", borderwidth=1)
)

# Show the plot
fig.show()


