In [1]:
# Import necessary libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the data from the provided Excel file
data = pd.read_excel(r"c:/Users/c.hakker/OneDrive - VISTA college/Senior Stuff/Opleiding Data science/Back up UWV/Sick leave x Employed persons.xlsx")

# Convert 'Costs of illness per year of employment' column to integer for consistent display
data['\nCosts of illness per year of employment'] = data['\nCosts of illness per year of employment'].astype(int)

# Filter data to show text only for the top 3 highest 'Costs of illness per year of employment'
top_9_data = data.nlargest(9, '\nCosts of illness per year of employment')

# Bubble Plot using Plotly with discrete color scale for categories
fig = px.scatter(data, 
                 x='Employed persons 2023 (x1000)', 
                 y='Sick leave percentage 2023', 
                 size='Employed persons 2023 (x1000)', 
                 color='Branche (SBI 2008)',  # Categorical color scale
                 hover_name='Branche (SBI 2008)',
                 title='Bubble Up: Unveiling the Top Sick Leave Costs in Health, Trade, and Manufacturing',
                 labels={'Employed persons 2023 (x1000)': 'Employed Persons (2023, x1000)', 
                         'Sick leave percentage 2023': 'Sick Leave Percentage'},
                 size_max=90,  # Adjust size_max to control bubble size scaling
                 color_discrete_sequence=px.colors.qualitative.Set2)  # Change this to any discrete color scale

# Customize the layout to show text only for top 3
for index, row in top_9_data.iterrows():
    fig.add_annotation(x=row['Employed persons 2023 (x1000)'], 
                       y=row['Sick leave percentage 2023'],
                       text=f"{row['Branche']}:<br>€{row['\nCosts of illness per year of employment']:,}".replace(',', '.'),
                       showarrow=False,
                       font=dict(size=12, color='grey', family="Roboto"),
                       align='center',
                       xshift=0,  # Adjust horizontal position
                       yshift=19,  # Shift text upwards (increase value for higher position)
                       bordercolor='rgba(0, 0, 0, 0)',  # Transparent border 
                       borderwidth=1, 
                       borderpad=2, 
                       bgcolor='rgba(0, 0, 0, 0)',  # Transparent background
                       opacity=1)  # Adjust annotation style

# Update layout 
fig.update_layout(
    showlegend=True,
    xaxis=dict(
        showline=True, 
        linewidth=1, 
        linecolor='black', 
        showgrid=False  # Remove vertical gridlines
    ),
    yaxis=dict(
        showline=True, 
        linewidth=1, 
        linecolor='black', 
        showgrid=True, 
        gridcolor='lightgrey',  # Horizontal gridlines in light grey
        title=dict(
            text='',  # Remove default title text to avoid overlap
        ),
        automargin=True  # Adjusts the margin automatically to fit the title
    ),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    width=1200,  # Width of the plot
    height=600  # Height of the plot
)

# Add annotation for horizontal y-axis title
fig.add_annotation(
    text="Sick Leave Percentage",  # Text for y-axis title
    xref="paper", yref="paper",  # Coordinates relative to the plot
    x=-0.07, y=1.08,  # Adjust x and y values to position the text
    showarrow=False,  # No arrow
    font=dict(size=12),
    align="center"
)

# Update text position on the bubbles
fig.update_traces(textposition='middle center',
                  marker=dict(line=dict(color='white', width=1)))  # White outline for text visibility

# Save the plot as an HTML file
fig.write_html("Sick_leave_analysis.html")

# Show the plot
fig.show()


  sf: grouped.get_group(s if len(s) > 1 else s[0])
