In [4]:
import pandas as pd
import plotly.express as px

# Load the data from the provided Excel file
data = pd.read_excel(r"c:/Users/c.hakker/OneDrive - VISTA college/Senior Stuff/Opleiding Data science/Data/Sick leave x Employed persons - Correctie.xlsx")

# Convert 'Costs of illness per year of employment' column to integer
data['Costs of illness per year of employment'] = data['Costs of illness per year of employment'].astype(int)

# Filter data to show text only for the top 3 highest 'Costs of illness per year of employment'
top_3_data = data.nlargest(3, 'Costs of illness per year of employment')

# Display the filtered data
print(top_3_data)


# Custom color palette
custom_colors = [
    "#024b7a", "#44b7c2", "#ffae49", "#3f9abf", "#5fb5db", 
    "#01918b", "#ff912b", "#f16c51", "#15bfb6", "#ef9051"
]

# Bubble Plot using Plotly with custom discrete color scale
fig = px.scatter(
    data, 
    x='Employed persons 2023 (x1000)', 
    y='Sick leave percentage 2023', 
    size='Employed persons 2023 (x1000)', 
    color='Branche (SBI 2008)',  # Categorical color scale
    hover_name='Branche (SBI 2008)',
    title='Bubble Up: Unveiling the Top Sick Leave Costs in Health, Trade, and Manufacturing',
    labels={
        'Employed persons 2023 (x1000)': 'Employed Persons (2023, x1000)', 
        'Sick leave percentage 2023': 'Sick Leave Percentage'
    },
    size_max=90,  # Adjust size_max to control bubble size scaling
    color_discrete_sequence=custom_colors  # Use custom color palette
)

# Customize the layout to show text only for the top 3
for index, row in top_3_data.iterrows():
    fig.add_annotation(
        x=row['Employed persons 2023 (x1000)'], 
        y=row['Sick leave percentage 2023'],
        text=f"{row['Branche']}:<br>€{row['Costs of illness per year of employment']:,}".replace(',', '.'),
        showarrow=False,
        font=dict(size=12, color='Black', family="Roboto"),
        align='center',
        xshift=0,
        yshift=9,
        bordercolor='rgba(0, 0, 0, 0)',
        borderwidth=1,
        borderpad=2,
        bgcolor='rgba(0, 0, 0, 0)',
        opacity=1
    )

# Update layout 
fig.update_layout(
    showlegend=True,
    xaxis=dict(
        showline=True, 
        linewidth=1, 
        linecolor='black', 
        showgrid=False
    ),
    yaxis=dict(
        showline=True, 
        linewidth=1, 
        linecolor='black', 
        showgrid=True, 
        gridcolor='lightgrey',
        title=dict(text=''),
        automargin=True
    ),
    plot_bgcolor='rgba(0,0,0,0)', 
    width=1200,  
    height=600  
)

# Add annotation for horizontal y-axis title
fig.add_annotation(
    text="Sick Leave Percentage",  
    xref="paper", yref="paper",  
    x=-0.07, y=1.08,  
    showarrow=False,  
    font=dict(size=12),
    align="center"
)

# Update text position on the bubbles
fig.update_traces(
    textposition='middle center',
    marker=dict(line=dict(color='white', width=1))
)

# Save the plot as an HTML file
fig.write_html("C:/Users/c.hakker/Downloads/Bubble_Up.html")

# Show the plot
fig.show()


   Branche (SBI 2008)  Sick leave percentage 2023  \
16      Q Health care                         7.4   
6             G Trade                         4.6   
2     C Manufacturing                         6.1   

    Employed persons 2023 (x1000)  Loonkosten per arbeidsjaar (x1000)  \
16                           1625                                65.2   
6                            1558                                60.7   
2                             763                                78.7   

    Sick leave x employed persons  Costs of illness per year of employment  \
16                        12025.0                                   784030   
6                          7166.8                                   435024   
2                          4654.3                                   366293   

   Branche  
16       Q  
6        G  
2        C  




