In [1]:
import pandas as pd
import plotly.express as px
import cbsodata

# Fetch the specific dataset using its ID
data = pd.DataFrame(cbsodata.get_data('80072NED'))

# Extract year and quarter from the 'Perioden' column
data['Year'] = data['Perioden'].str.extract(r'(\d{4})').astype(float)
data['Quarter'] = data['Perioden'].str.extract(r'(\d)e kwartaal').astype(float)

# Drop rows where Year or Quarter is NaN (e.g., full year periods)
data = data.dropna(subset=['Year', 'Quarter'])

# Convert Year and Quarter to integers
data['Year'] = data['Year'].astype(int)
data['Quarter'] = data['Quarter'].astype(int)

# Filter data to include only years from 2016 to 2023
data = data[(data['Year'] >= 2016) & (data['Year'] <= 2023)]

# Aggregate the data to handle duplicates
agg_data = data.groupby(['Quarter', 'Year']).agg({'Ziekteverzuimpercentage_1': 'mean'}).reset_index()

# Pivot the table to get years as columns and quarters as rows
pivot_data = agg_data.pivot(index='Quarter', values='Ziekteverzuimpercentage_1', columns='Year')

# Melt the pivoted data for Plotly
melted_data = pivot_data.reset_index().melt(id_vars='Quarter', var_name='Year', value_name='Sick Leave Percentage')

# Define custom colors to match your other plot
colors = [
    '#44b7c2',  # 2016
    '#ff6b08',  # 2017
    '#024b7a',  # 2018
    '#37759b',  # 2019
    '#ffae49',  # 2020
    '#01918b',  # 2021
    '#00575c',  # 2022
    '#de663e',  # 2023
]

# Create the Plotly line chart
fig = px.line(
    melted_data,
    x='Quarter',
    y='Sick Leave Percentage',
    color='Year',
    title='Sick Leave Through the Years: Quarterly Trends from 2016 to 2023',
    markers=False  # No markers for a cleaner look
)

# Update layout to match your MAE comparison plot style
fig.update_layout(
    template='plotly_white',
    title=dict(
        text='Sick Leave Through the Years: Quarterly Trends from 2016 to 2023',
        x=0.01,
        y=0.95,
        xanchor='left',
        yanchor='top',
        font=dict(family='Roboto', size=20, color='black')
    ),
    xaxis=dict(
        title=None,
        tickmode='array',
        tickvals=[1, 2, 3, 4],
        ticktext=['Q1', 'Q2', 'Q3', 'Q4'],
        showgrid=False,
        linecolor=None,
        ticks='outside'
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='lightgrey',
        automargin=True,
        title=None  # Remove the vertical Y-axis title
    ),
    width=1200,
    height=600,
    font=dict(family='Roboto', size=12, color='black'),
    showlegend=False,  # Remove the legend
    annotations=[
        dict(
            text='CBS Sick Leave (%)',  # Horizontal Y-axis title
            x=-0.05,  # Adjust the X position
            y=1.06,  # Adjust the Y position
            xref='paper',
            yref='paper',
            showarrow=False,
            textangle=0,  # Horizontal text
            font=dict(size=12)
        )
    ]
)

# Apply custom colors to the lines
for i, trace in enumerate(fig.data):
    trace.update(line=dict(color=colors[i % len(colors)], width=2))

# Add labels at the end of each line in the same color as the line
for i, trace in enumerate(fig.data):
    year = trace.name
    last_x = melted_data[melted_data['Year'] == int(year)]['Quarter'].max()
    last_y = melted_data[(melted_data['Year'] == int(year)) & (melted_data['Quarter'] == last_x)]['Sick Leave Percentage'].values[0]
    fig.add_annotation(
        x=last_x + 0.1,  # Slightly offset to the right
        y=last_y,
        text=str(year),
        font=dict(color=colors[i % len(colors)], size=12),
        showarrow=False,
        xref='x',
        yref='y'
    )

# Show the updated plot
fig.show()

# Save the plot as an HTML file
fig.write_html("sickleave_trends_MAE_style.html")


  sf: grouped.get_group(s if len(s) > 1 else s[0])
