In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
from uwv.config import CBS80072NED, CBS_OPENDATA_PROCESSED_DATA_DIR, OUTPUT_DIR

In [None]:
# Load the data
cbs = pd.read_parquet(CBS_OPENDATA_PROCESSED_DATA_DIR / f"{CBS80072NED}.parquet")

In [None]:
# Filter out rows where 'period_quarter_number' is 0
cbs = cbs[cbs['period_quarter_number'] != 0]

In [None]:
# Map quarters to months and create the 'date' column
cbs['month'] = cbs['period_quarter_number'].map({1: 1, 2: 4, 3: 7, 4: 10})
cbs['date'] = pd.to_datetime({'year': cbs['period_year'], 'month': cbs['month'], 'day': 1})

In [None]:
#Set this new 'date' column as the index
cbs.set_index('date', inplace=True)

In [None]:
# Display the DataFrame to verify
print(cbs.head())

In [None]:
# Define a function to filter and resample data for a specific 'sbi_title'
def process_sbi_data(sbi_code):
    # Filter the data for the specific 'sbi_title'
    filtered_data = cbs[cbs['sbi'] == sbi_code]
    
    # Resample to ensure quarterly frequency
    filtered_data = filtered_data.resample('QS').asfreq()
    
    # Display the resampled data to verify
    print(filtered_data.head())
    
    return filtered_data

In [None]:
# Example: Process data for "A-U Alle economische activiteiten" (code: T001081)
sbi_code = 'T001081'
sbi_data = process_sbi_data(sbi_code)

In [None]:
sbi_code = 'T001081'  # Example: A-U Alle economische activiteiten

# Filter data based on 'sbi_title'
filtered_cbs = cbs[cbs['sbi'] == sbi_code]

In [None]:
# Filter the data to include only dates from 2016 to 2023
filtered_cbs = filtered_cbs.loc['2016-01-01':'2023-12-31']

In [None]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

In [None]:
span = 4
alpha = 2/(span+1)

In [None]:
filtered_cbs['EWMA12'] = filtered_cbs['sick_leave_percentage'].ewm(alpha=alpha, adjust=False).mean()  # adjust=False specifies no adjustment based on time period

In [None]:
model = SimpleExpSmoothing(filtered_cbs['sick_leave_percentage'])

In [None]:
fitted_model = model.fit(smoothing_level=alpha,optimized=False)

In [None]:
filtered_cbs['SES12'] = fitted_model.fittedvalues.shift(-1)

In [None]:
 #Double Exponential Smoothing

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
filtered_cbs['DESadd12'] = ExponentialSmoothing(filtered_cbs['sick_leave_percentage'], trend='add').fit().fittedvalues.shift(-1)

In [None]:
filtered_cbs['DES_mul_12'] = ExponentialSmoothing(filtered_cbs['sick_leave_percentage'], trend='mul').fit().fittedvalues.shift(-1)

In [None]:
filtered_cbs[['sick_leave_percentage','SES12','DESadd12','DES_mul_12']].iloc[:24].plot(figsize=(12,6))

In [None]:
#Triple Exponential Smoothing 

In [None]:
filtered_cbs['TES_mul_12'] = ExponentialSmoothing(filtered_cbs['sick_leave_percentage'],trend='mul',seasonal='mul',seasonal_periods=4).fit().fittedvalues

In [None]:
# Plot the results
filtered_cbs[['sick_leave_percentage', 'SES12', 'DESadd12', 'DES_mul_12','TES_mul_12']].plot(figsize=(12,6))
plt.title('Sick Leave Percentage with Exponential Smoothing (2016-2023)')
plt.show()

In [None]:
# Plot the results
filtered_cbs[['sick_leave_percentage', 'SES12','TES_mul_12']].plot(figsize=(12,6))
plt.title('Sick Leave Percentage with Exponential Smoothing (2016-2023)')
plt.show()