# 2025 Working Days Predictions Using Pretrained Models

This notebook loads pretrained models from a pickle file and uses them to predict the full set of columns for 2025 working days, matching the structure of `2025_working_days_predictions.csv`. No retraining is performed.

In [2]:
import pandas as pd
import numpy as np
import pickle
from datetime import datetime

# Load pretrained models and parameters
with open('predictions_full.pkl', 'rb') as f:
    all_models = pickle.load(f)

employee_models = all_models['employee_models']
task_time_models_2025 = all_models['task_time_models_2025']
PROPHET_PARAMS = all_models['PROPHET_PARAMS']
DEFAULT_PROPHET_PARAMS = all_models['DEFAULT_PROPHET_PARAMS']
OPTIMIZED_HUBER_PARAMS = all_models['OPTIMIZED_HUBER_PARAMS']
DEFAULT_HUBER_PARAMS = all_models['DEFAULT_HUBER_PARAMS']
REGRESSION_WEIGHT_THRESHOLD = all_models['REGRESSION_WEIGHT_THRESHOLD']
REGRESSION_WEIGHT_HIGH = all_models['REGRESSION_WEIGHT_HIGH']
REGRESSION_WEIGHT_LOW = all_models['REGRESSION_WEIGHT_LOW']

# Load evaluation input
test_df = pd.read_csv('task2_test_inputs.csv')

In [3]:
# Feature engineering for test data
test_df['date'] = pd.to_datetime(test_df['date'])
test_df['year'] = test_df['date'].dt.year
test_df['month'] = test_df['date'].dt.month
test_df['dayofweek'] = test_df['date'].dt.dayofweek
test_df['quarter'] = test_df['date'].dt.quarter

test_df['is_weekend'] = test_df['date'].dt.dayofweek.isin([5, 6]).astype(int)

# Prepare holiday data (from combine_fcode_2)
holiday_data = [
    # 2021-2025 holidays (same as combine_fcode_2)
    '2021-01-14','2021-01-28','2021-02-04','2021-02-26','2021-03-11','2021-03-28','2021-04-02','2021-04-12','2021-04-13','2021-04-14','2021-04-26','2021-05-01','2021-05-14','2021-05-24','2021-05-25','2021-05-26','2021-05-27','2021-06-24','2021-07-21','2021-07-23','2021-08-22','2021-09-20','2021-10-19','2021-10-20','2021-11-04','2021-11-18','2021-12-18','2021-12-25',
    '2022-01-14','2022-01-17','2022-02-04','2022-02-16','2022-03-01','2022-03-17','2022-04-11','2022-04-12','2022-04-13','2022-04-14','2022-04-15','2022-04-16','2022-05-01','2022-05-02','2022-05-03','2022-05-15','2022-05-16','2022-06-13','2022-06-14','2022-06-17','2022-06-24','2022-07-01','2022-07-08','2022-07-10','2022-07-13','2022-07-15','2022-07-22','2022-07-29','2022-08-11','2022-09-10','2022-09-19','2022-10-09','2022-10-09','2022-10-10','2022-10-24','2022-11-07','2022-12-07','2022-12-25','2022-12-26',
    '2023-01-06','2023-01-15','2023-02-04','2023-02-05','2023-02-18','2023-03-06','2023-04-05','2023-04-07','2023-04-13','2023-04-14','2023-04-22','2023-05-01','2023-05-05','2023-05-06','2023-06-03','2023-06-29','2023-07-03','2023-08-01','2023-08-30','2023-09-28','2023-09-29','2023-10-28','2023-11-12','2023-11-26','2023-12-25','2023-12-26',
    '2024-01-15','2024-01-25','2024-02-04','2024-02-23','2024-03-08','2024-03-24','2024-03-29','2024-04-11','2024-04-12','2024-04-13','2024-04-15','2024-04-23','2024-05-01','2024-05-23','2024-05-24','2024-06-17','2024-06-21','2024-07-20','2024-08-19','2024-09-16','2024-09-17','2024-09-23','2024-10-17','2024-10-31','2024-11-15','2024-12-14','2024-12-25',
    '2025-01-13','2025-01-14','2025-02-04','2025-02-12','2025-02-26','2025-03-13','2025-03-31','2025-04-12','2025-04-13','2025-04-14','2025-04-15','2025-04-18','2025-05-01','2025-05-12','2025-05-13','2025-06-07','2025-06-10','2025-07-10','2025-08-08','2025-09-05','2025-09-07','2025-10-06','2025-10-20','2025-11-05','2025-12-04','2025-12-25'
]
holiday_dates = pd.to_datetime(holiday_data)
test_df['is_holiday'] = test_df['date'].isin(holiday_dates).astype(int)
test_df['is_working_day'] = ((test_df['is_holiday'] == 0) & (test_df['is_weekend'] == 0)).astype(int)

In [4]:
# Prepare output DataFrame with required columns
output_cols = [
    'date', 'section_id', 'employees_on_duty', 'total_task_time_minutes', 'is_holiday', 'is_weekend',
    'year', 'month', 'dayofweek', 'quarter', 'is_working_day',
    'predicted_employee_count', 'predicted_task_time_minutes',
    'employee_lower_bound', 'employee_upper_bound', 'task_time_lower_bound', 'task_time_upper_bound'
]

results = []
for idx, row in test_df.iterrows():
    section = row['section_id']
    date = row['date']
    is_weekend = row['is_weekend']
    is_holiday = row['is_holiday']
    is_working_day = row['is_working_day']
    year = row['year']
    month = row['month']
    dayofweek = row['dayofweek']
    quarter = row['quarter']
    
    # Default values for employees_on_duty and total_task_time_minutes
    employees_on_duty = 0
    total_task_time_minutes = 0.0
    
    # Predict task time using Prophet model if available
    if section in task_time_models_2025:
        prophet_model = task_time_models_2025[section]
        future_df = pd.DataFrame({'ds': [date], 'is_weekend': [is_weekend], 'is_holiday': [is_holiday]})
        forecast = prophet_model.predict(future_df)
        predicted_task_time = max(0, forecast['yhat'].iloc[0])
        task_time_lower = forecast['yhat_lower'].iloc[0]
        task_time_upper = forecast['yhat_upper'].iloc[0]
    else:
        predicted_task_time = 60.0
        task_time_lower = 36.0
        task_time_upper = 1450.0
    
    # Predict employees using regression and efficiency
    if section in employee_models:
        model_info = employee_models[section]
        reg_pred = model_info['regression_model'].predict(np.array([[predicted_task_time]]))[0]
        eff_pred = predicted_task_time / model_info['avg_task_per_employee'] if model_info['avg_task_per_employee'] > 0 else 0
        r2_val = model_info['training_r2']
        weight_reg = REGRESSION_WEIGHT_HIGH if r2_val > REGRESSION_WEIGHT_THRESHOLD else REGRESSION_WEIGHT_LOW
        final_pred = weight_reg * reg_pred + (1 - weight_reg) * eff_pred
        final_pred = np.round(final_pred).astype(int)
        final_pred = max(1, final_pred)
        min_employees = model_info['min_employees']
        max_employees = int(model_info['max_employees'] * 1.3)
        final_pred = np.clip(final_pred, max(1, min_employees), max_employees)
        # Confidence bounds for employees
        employees_lower = np.round(task_time_lower / model_info['avg_task_per_employee']).astype(int)
        employees_upper = np.round(task_time_upper / model_info['avg_task_per_employee']).astype(int)
        employees_lower = np.clip(employees_lower, max(1, min_employees), final_pred)
        employees_upper = np.clip(employees_upper, final_pred, int(model_info['max_employees'] * 1.5))
    else:
        final_pred = 1
        employees_lower = 1
        employees_upper = 4
    
    results.append([
        date, section, employees_on_duty, total_task_time_minutes, is_holiday, is_weekend,
        year, month, dayofweek, quarter, is_working_day,
        final_pred, predicted_task_time, employees_lower, employees_upper, task_time_lower, task_time_upper
    ])

output_df = pd.DataFrame(results, columns=output_cols)
output_df.head(10)

Unnamed: 0,date,section_id,employees_on_duty,total_task_time_minutes,is_holiday,is_weekend,year,month,dayofweek,quarter,is_working_day,predicted_employee_count,predicted_task_time_minutes,employee_lower_bound,employee_upper_bound,task_time_lower_bound,task_time_upper_bound
0,2025-01-01,SEC-001,0,0.0,0,0,2025,1,2,1,1,2,769.001413,1,4,69.533268,1491.366584
1,2025-01-01,SEC-002,0,0.0,0,0,2025,1,2,1,1,4,1435.331359,2,6,656.076534,2236.901306
2,2025-01-01,SEC-003,0,0.0,0,0,2025,1,2,1,1,2,849.557019,1,4,212.756956,1503.33307
3,2025-01-01,SEC-004,0,0.0,0,0,2025,1,2,1,1,3,964.579725,1,5,238.87783,1651.962817
4,2025-01-01,SEC-005,0,0.0,0,0,2025,1,2,1,1,6,2510.344552,2,11,754.452696,4486.955635
5,2025-01-01,SEC-006,0,0.0,0,0,2025,1,2,1,1,5,1880.501315,2,8,614.340445,3133.496695
6,2025-01-02,SEC-001,0,0.0,0,0,2025,1,3,1,1,2,754.151817,1,4,42.149423,1461.149755
7,2025-01-02,SEC-002,0,0.0,0,0,2025,1,3,1,1,4,1411.870581,2,6,594.007206,2206.577317
8,2025-01-02,SEC-003,0,0.0,0,0,2025,1,3,1,1,2,851.242216,1,4,218.7613,1460.604293
9,2025-01-02,SEC-004,0,0.0,0,0,2025,1,3,1,1,3,981.220253,1,5,287.81666,1760.256273
