In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import cbsodata
import seaborn as sns

In [2]:
# Set the font globally
matplotlib.rcParams['font.family'] = 'Roboto'  # You can replace 'Roboto' with any installed font

In [3]:
# Fetch the specific dataset using its ID
data = pd.DataFrame(cbsodata.get_data('80072NED'))

In [None]:
# Display the first few rows of the dataset
print(data.head())

In [5]:
# Extract year and quarter from the 'Perioden' column
data['Year'] = data['Perioden'].str.extract(r'(\d{4})').astype(float)
data['Quarter'] = data['Perioden'].str.extract(r'(\d)e kwartaal').astype(float)


In [6]:
# Drop rows where Year or Quarter is NaN (e.g., full year periods)
data = data.dropna(subset=['Year', 'Quarter'])

In [7]:
# Convert Year and Quarter to integers
data['Year'] = data['Year'].astype(int)
data['Quarter'] = data['Quarter'].astype(int)

In [8]:
# Filter data to include only years from 2016 to 2023
data = data[(data['Year'] >= 2016) & (data['Year'] <= 2023)]

In [9]:
# Aggregate the data to handle duplicates
agg_data = data.groupby(['Quarter', 'Year']).agg({'Ziekteverzuimpercentage_1': 'mean'}).reset_index()

In [10]:
# Pivot the table to get years as columns and quarters as rows
pivot_data = agg_data.pivot(index='Quarter', values='Ziekteverzuimpercentage_1', columns='Year')

In [11]:
# Sort columns in descending order
pivot_data = pivot_data[sorted(pivot_data.columns, reverse=True)]

In [12]:
# Set a pastel color palette
sns.set_palette("colorblind")

In [13]:
# Get the current color palette
colors = sns.color_palette("colorblind", n_colors=len(pivot_data.columns))

In [None]:
# Plotting the line chart
fig, ax = plt.subplots(figsize=(12, 6))
pivot_data.plot(kind='line', ax=ax)

# Set titles and labels with a specific font
plt.title('Sickleave Percentage per Year & Quarter', fontdict={'fontsize': 12, 'fontweight': 'normal'})
plt.xlabel('Quarter', fontdict={'fontsize': 10, 'fontweight': 'normal'})

# Set y-axis label, move it to the top, and rotate it
ax.set_ylabel('%', fontdict={'fontsize': 12, 'fontweight': 'normal'}, rotation=0, labelpad=15) 

# Ensure x-axis labels are whole numbers
ax.set_xticks(pivot_data.index)
ax.set_xticklabels(pivot_data.index)

# Adjust axis limits to remove white margins
ax.set_xlim(pivot_data.index.min(), pivot_data.index.max())

# Add year labels directly behind their corresponding lines, with matching colors
for color, year in zip(colors, pivot_data.columns):
    ax.text(
        x=pivot_data.index.max(),  # Position at the rightmost point of the line
        y=pivot_data[year].iloc[-1],  # Position at the last value of the line
        s=str(year),  # The year label
        va='center',  # Vertically align the text
        ha='left',  # Horizontally align the text
        fontsize=10,  # Font size for the labels
        color=color,  # Set the text color to match the line
        bbox=dict(facecolor='white', alpha=0.6, edgecolor='none')  # Optional background box
    )

# Remove the legend
ax.legend().remove()

# Remove the upper and right spines (lines)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add light grey vertical grid lines
ax.grid(axis='y', color='grey', linestyle='-', linewidth=0.5)

# Remove remaining white margins
plt.tight_layout()

# Save the plot to a file before showing it
plt.savefig("line_plot.png", bbox_inches='tight')
plt.show()

In [None]:
# Add annotation for horizontal y-axis title
fig.add_annotation(
    text="Sick Leave Percentage",  # Text for y-axis title
    xref="paper", yref="paper",  # Coordinates relative to the plot
    x=-0.03, y=1.05,  # Adjust x and y values to position the text
    showarrow=False,  # No arrow
    font=dict(size=12),
    align="center"
)

In [None]:
import pandas as pd
import cbsodata
import plotly.graph_objs as go
import plotly.io as pio
from plotly.subplots import make_subplots

# Set default Plotly template
pio.templates.default = "plotly_white"

# Fetch the specific dataset using its ID
data = pd.DataFrame(cbsodata.get_data('80072NED'))

# Extract year and quarter from the 'Perioden' column
data['Year'] = data['Perioden'].str.extract(r'(\d{4})').astype(float)
data['Quarter'] = data['Perioden'].str.extract(r'(\d)e kwartaal').astype(float)

# Drop rows where Year or Quarter is NaN (e.g., full year periods)
data = data.dropna(subset=['Year', 'Quarter'])

# Convert Year and Quarter to integers
data['Year'] = data['Year'].astype(int)
data['Quarter'] = data['Quarter'].astype(int)

# Filter data to include only years from 2016 to 2023
data = data[(data['Year'] >= 2016) & (data['Year'] <= 2023)]

# Aggregate the data to handle duplicates
agg_data = data.groupby(['Quarter', 'Year']).agg({'Ziekteverzuimpercentage_1': 'mean'}).reset_index()

# Pivot the table to get years as columns and quarters as rows
pivot_data = agg_data.pivot(index='Quarter', values='Ziekteverzuimpercentage_1', columns='Year')

# Sort columns in descending order
pivot_data = pivot_data[sorted(pivot_data.columns, reverse=True)]

# Define a color palette manually for the lines
color_palette = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']

# Create two subplots with shared x-axis
fig = make_subplots(
    rows=2, cols=1, 
    shared_xaxes=True,
    row_heights=[0.6, 0.4],  # Adjust heights for upper and lower plot
    vertical_spacing=0.02  # Minimize the space between plots to create the broken axis illusion
)

# Add traces for each year to the upper plot (range 3.5 to 6.5)
for idx, year in enumerate(pivot_data.columns):
    fig.add_trace(go.Scatter(
        x=pivot_data.index,
        y=pivot_data[year].where(pivot_data[year] > 3.5),  # Only plot values > 3.5
        mode='lines+text',
        name=str(year),
        line=dict(color=color_palette[idx % len(color_palette)], width=3),
        text=[f"{year}" if x == pivot_data.index[-1] else "" for x in pivot_data.index],  # Add year label at the last point
        textposition="top right",
        textfont=dict(color=color_palette[idx % len(color_palette)], size=12)
    ), row=1, col=1)

# Add traces for each year to the lower plot (range 0 to 3.0)
for idx, year in enumerate(pivot_data.columns):
    fig.add_trace(go.Scatter(
        x=pivot_data.index,
        y=pivot_data[year].where(pivot_data[year] <= 3.0),  # Only plot values <= 3.0
        mode='lines+text',
        name=str(year),
        line=dict(color=color_palette[idx % len(color_palette)], width=3, dash='dot'),  # Dashed line for the lower plot
        text=[f"{year}" if x == pivot_data.index[-1] else "" for x in pivot_data.index],  # Add year label at the last point
        textposition="top right",
        textfont=dict(color=color_palette[idx % len(color_palette)], size=12)
    ), row=2, col=1)

# Customize the upper y-axis (range 3.5 to 6.5)
fig.update_yaxes(
    range=[3.5, 7.0],  # Set range for upper plot
    tickvals=[3.5, 4.0, 4.5, 5.0,5.5, 6.0,6.5],
    row=1, col=1
)

# Customize the lower y-axis (range 0.0 to 3.0)
fig.update_yaxes(
    range=[0, 3.0],  # Set range for lower plot
    tickvals=[0.0],
    row=2, col=1
)

# Customize the x-axis (Quarters)
fig.update_xaxes(
    tickvals=[1, 2, 3, 4],  # Show Quarters 1, 2, 3, 4
    ticktext=['1', '2', '3', '4'],
    row=2, col=1  # Only show x-axis labels on the lower plot
)

# Remove x-axis labels from the upper plot for a clean look
fig.update_xaxes(showticklabels=False, row=1, col=1)

# Set overall layout
fig.update_layout(
    title='Sick Leave Trends from 2016 to 2023: Simulated Broken Y-Axis',
    font=dict(family="Roboto", size=12),
    showlegend=False,  # Remove the legend
    hovermode="x unified",
    width=1200,  # Adjusted width
    height=600,  # Adjusted height
    margin=dict(l=50, r=20, t=70, b=50),  # Adjustable top margin
    plot_bgcolor='rgba(0,0,0,0)'  # Transparent background
)

# Show the plot
fig.show()
