In [75]:
# import libraries
import pandas as pd
import numpy as np

# data visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [76]:
df_cbs = pd.read_csv('Data/ziekteverzuim totaal.csv', sep=',')
df_cbs.head()

Unnamed: 0,Group,Category,Ziekteverzuim
0,2013,Vierde kwartaal,39
1,2014,Eerste kwartaal,4
2,2014,Tweede kwartaal,37
3,2014,Derde Kwartaal,35
4,2014,Vierde kwartaal,4


In [77]:
print(df_cbs['Ziekteverzuim'].unique())

['3,9' '4' '3,7' '3,5' '4,4' '4,3' '3,8' '4,1' '4,2' '4,9' '4,7' '4,5'
 '5,2' '4,8' '4,6' '5,4' '6,3' '5' '5,6' '5,7' '5,5']


In [78]:
df_cbs['Ziekteverzuim'] = df_cbs['Ziekteverzuim'].str.replace(',', '.')

In [79]:
df_cbs['Ziekteverzuim'].replace('', float('nan'), inplace=True)


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [80]:
df_cbs['Ziekteverzuim'] = df_cbs['Ziekteverzuim'].astype(float)

In [81]:
print(df_cbs.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Group          41 non-null     int64  
 1   Category       41 non-null     object 
 2   Ziekteverzuim  41 non-null     float64
dtypes: float64(1), int64(1), object(1)
memory usage: 1.1+ KB
None


In [82]:
df_cbs.head()

Unnamed: 0,Group,Category,Ziekteverzuim
0,2013,Vierde kwartaal,3.9
1,2014,Eerste kwartaal,4.0
2,2014,Tweede kwartaal,3.7
3,2014,Derde Kwartaal,3.5
4,2014,Vierde kwartaal,4.0


In [83]:
# Replace 'Derde Kwartaal' with 'Derde kwartaal'....
df_cbs['Category'] = df_cbs['Category'].replace('Derde Kwartaal', 'Derde kwartaal')

In [84]:
# Sort the Group column in descending order and get the unique values
year_order = sorted(df_cbs['Group'].unique(), reverse=True)

In [85]:
# Define the order of the categories
category_order = ['Eerste kwartaal', 'Tweede kwartaal', 'Derde kwartaal', 'Vierde kwartaal'] 

In [86]:
# Bar plot with ordered x-axis
bar_plot = px.bar(df_cbs, x='Category', y='Ziekteverzuim', color='Group',
                  barmode='group', title='Bar Plot of Ziekteverzuim by Category and Year',
                  category_orders={'Category': category_order},
width=1000,  # set the width of the plot
height=500  # set the height of the plot
                  )
bar_plot.show()

In [87]:
bar_plot.write_image("bar_plot.png")

In [88]:
# Scatter plot with ordered x-axis and Group in descending order
scatter_plot = px.scatter(df_cbs, x='Category', y='Ziekteverzuim', color='Group',
                          title='Scatter Plot of Ziekteverzuim by Category and Year',
                          category_orders={'Category': category_order, 'Group': year_order},
width=800,  # set the width of the plot
height=400  # set the height of the plot
                          )
scatter_plot.show()

# Box plot with ordered x-axis and Group in descending order
box_plot = px.box(df_cbs, x='Category', y='Ziekteverzuim', color='Group',
                  title='Box Plot of Ziekteverzuim by Category and Year',
                  category_orders={'Category': category_order, 'Group': year_order},
width=800,  # set the width of the plot
height=400  # set the height of the plot
                  )
box_plot.show()

In [89]:
box_plot.write_image("box_plot.png")
scatter_plot.write_image("scatter_plot.png")

In [90]:
# Facet plot with ordered x-axis and Group in descending order
facet_plot = px.line(df_cbs, x='Category', y='Ziekteverzuim', color='Group', facet_col='Group',
                     title='Facet Plot of Ziekteverzuim by Category and Year',
                     category_orders={'Category': category_order, 'Group': year_order},
width=1000,  # set the width of the plot
height=500  # set the height of the plot
                     )
facet_plot.show()

In [91]:
facet_plot.write_image("facet_plot.png")

In [92]:
# Custom color palette
color_palette = px.colors.qualitative.Dark24

# Create the plot
design_plot = px.line(
    df_cbs, 
    x='Category', 
    y='Ziekteverzuim', 
    color='Group',
    title='Ziekteverzuim by Category and Year',
    category_orders={'Category': category_order, 'Group': year_order},
    color_discrete_sequence=color_palette
)

# Update the layout for a more polished design
design_plot.update_layout(
    plot_bgcolor='white',  # Background color of the plot
    paper_bgcolor='white',  # Background color around the plot
    title_font=dict(size=24, family='Arial, sans-serif', color='darkblue'),
    xaxis_title='Category',
    yaxis_title='Ziekteverzuim',
    xaxis=dict(
        showgrid=True,
        gridcolor='white',
        zeroline=False
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='white',
        zeroline=False
    ),
    legend=dict(
        title='Year',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    ),
    font=dict(
        family='Arial, sans-serif',
        size=12,
        color='black'
    ),
width=1000,  # set the width of the plot
height=500  # set the height of the plot
)

# Show the plot
design_plot.show()

In [93]:
design_plot.write_image("design_plot.png")