In [4]:
import pandas as pd 
import plotly.graph_objects as go
import plotly.io as pio

# Load the dataset
file_path = r"C:\Users\c.hakker\OneDrive - VISTA college\Senior Stuff\Opleiding Data science\Data\baseline-mean-errors-80072ned.xlsx"
data = pd.read_excel(file_path)

# Clean the dataset by identifying models
data['Model'] = data['sbi_title'].apply(lambda x: 'Baseline' if 'Baseline' in x else 'SARIMAX')

# Convert 'MAE' column to numeric
data['MAE'] = pd.to_numeric(data['MAE'], errors='coerce')

# Define custom colors for each branch
sarimax_colors = {'C Manufacturing': '#3f9abf', 'G Trade': '#006789', 'Q Healthcare': '#5fb5db'}
baseline_colors = {'C Manufacturing': '#deaa00', 'G Trade': '#fe9001', 'Q Healthcare': '#febf01'}

# Loop through years (2022 and 2023)
for year in [2022, 2023]:
    # Filter data for the year
    year_data = data[data['Year'] == year]

    # Aggregate data by averaging duplicate entries for the same quarter, branch, and model
    cleaned_data = year_data.groupby(['sbi_title', 'quarter', 'Model']).agg({'MAE': 'mean'}).reset_index()

    # Split the cleaned data into baseline and SARIMAX
    baseline_data = cleaned_data[cleaned_data['Model'] == 'Baseline']
    sarimax_data = cleaned_data[cleaned_data['Model'] == 'SARIMAX']

    # Pivot the data for visualization
    baseline_pivot = baseline_data.pivot(index='quarter', columns='sbi_title', values='MAE')
    sarimax_pivot = sarimax_data.pivot(index='quarter', columns='sbi_title', values='MAE')

    # Harmonize branch names between baseline and SARIMAX pivot tables
    baseline_pivot.columns = baseline_pivot.columns.str.replace("Baseline ", "")

    # Extract unique quarters and branches
    quarters = baseline_pivot.index.astype(str)
    branches = baseline_pivot.columns

    # Create the Plotly figure
    fig = go.Figure()

    # Add bars for each branch
    for branch in branches:
        fig.add_trace(go.Bar(
            x=quarters,
            y=baseline_pivot[branch],
            name=f'{branch} Baseline ({year})',
            marker_color=baseline_colors[branch]
        ))
        fig.add_trace(go.Bar(
            x=quarters,
            y=sarimax_pivot[branch],
            name=f'{branch} SARIMAX ({year})',
            marker_color=sarimax_colors[branch]
        ))

    # Update the layout
    fig.update_layout(
        title=f"MAE Comparison: Baseline vs SARIMAX ({year})",
        xaxis_title="Quarters",
        yaxis_title="MAE",
        barmode='group',
        legend_title="Model",
        template="plotly_white"
    )

    # Save the plot as an HTML file
    output_file = f"MAE_Comparison_{year}.html"
    pio.write_html(fig, file=output_file)

    # Show the plot for the year
    fig.show()
