# Employee Prediction Using Pretrained Models (Pickle)

This notebook loads pretrained models from a pickle file and uses them to predict required employees for the provided evaluation dataset (`task2_test_inputs.csv`). No retraining is performed.

In [1]:
import pandas as pd
import numpy as np
import pickle
from datetime import datetime

# Load pretrained models and parameters
with open('predictions_full.pkl', 'rb') as f:
    all_models = pickle.load(f)

employee_models = all_models['employee_models']
task_time_models_2025 = all_models['task_time_models_2025']
PROPHET_PARAMS = all_models['PROPHET_PARAMS']
DEFAULT_PROPHET_PARAMS = all_models['DEFAULT_PROPHET_PARAMS']
OPTIMIZED_HUBER_PARAMS = all_models['OPTIMIZED_HUBER_PARAMS']
DEFAULT_HUBER_PARAMS = all_models['DEFAULT_HUBER_PARAMS']
REGRESSION_WEIGHT_THRESHOLD = all_models['REGRESSION_WEIGHT_THRESHOLD']
REGRESSION_WEIGHT_HIGH = all_models['REGRESSION_WEIGHT_HIGH']
REGRESSION_WEIGHT_LOW = all_models['REGRESSION_WEIGHT_LOW']

# Load evaluation input
test_df = pd.read_csv('task2_test_inputs.csv')

In [2]:
# Feature engineering for test data
test_df['date'] = pd.to_datetime(test_df['date'])
test_df['year'] = test_df['date'].dt.year
test_df['month'] = test_df['date'].dt.month
test_df['dayofweek'] = test_df['date'].dt.dayofweek
test_df['quarter'] = test_df['date'].dt.quarter

test_df['is_weekend'] = test_df['date'].dt.dayofweek.isin([5, 6]).astype(int)

# Prepare holiday data (from combine_fcode_2)
holiday_data = [
    # 2021-2025 holidays (same as combine_fcode_2)
    '2021-01-14','2021-01-28','2021-02-04','2021-02-26','2021-03-11','2021-03-28','2021-04-02','2021-04-12','2021-04-13','2021-04-14','2021-04-26','2021-05-01','2021-05-14','2021-05-24','2021-05-25','2021-05-26','2021-05-27','2021-06-24','2021-07-21','2021-07-23','2021-08-22','2021-09-20','2021-10-19','2021-10-20','2021-11-04','2021-11-18','2021-12-18','2021-12-25',
    '2022-01-14','2022-01-17','2022-02-04','2022-02-16','2022-03-01','2022-03-17','2022-04-11','2022-04-12','2022-04-13','2022-04-14','2022-04-15','2022-04-16','2022-05-01','2022-05-02','2022-05-03','2022-05-15','2022-05-16','2022-06-13','2022-06-14','2022-06-17','2022-06-24','2022-07-01','2022-07-08','2022-07-10','2022-07-13','2022-07-15','2022-07-22','2022-07-29','2022-08-11','2022-09-10','2022-09-19','2022-10-09','2022-10-09','2022-10-10','2022-10-24','2022-11-07','2022-12-07','2022-12-25','2022-12-26',
    '2023-01-06','2023-01-15','2023-02-04','2023-02-05','2023-02-18','2023-03-06','2023-04-05','2023-04-07','2023-04-13','2023-04-14','2023-04-22','2023-05-01','2023-05-05','2023-05-06','2023-06-03','2023-06-29','2023-07-03','2023-08-01','2023-08-30','2023-09-28','2023-09-29','2023-10-28','2023-11-12','2023-11-26','2023-12-25','2023-12-26',
    '2024-01-15','2024-01-25','2024-02-04','2024-02-23','2024-03-08','2024-03-24','2024-03-29','2024-04-11','2024-04-12','2024-04-13','2024-04-15','2024-04-23','2024-05-01','2024-05-23','2024-05-24','2024-06-17','2024-06-21','2024-07-20','2024-08-19','2024-09-16','2024-09-17','2024-09-23','2024-10-17','2024-10-31','2024-11-15','2024-12-14','2024-12-25',
    '2025-01-13','2025-01-14','2025-02-04','2025-02-12','2025-02-26','2025-03-13','2025-03-31','2025-04-12','2025-04-13','2025-04-14','2025-04-15','2025-04-18','2025-05-01','2025-05-12','2025-05-13','2025-06-07','2025-06-10','2025-07-10','2025-08-08','2025-09-05','2025-09-07','2025-10-06','2025-10-20','2025-11-05','2025-12-04','2025-12-25'
]
holiday_dates = pd.to_datetime(holiday_data)
test_df['is_holiday'] = test_df['date'].isin(holiday_dates).astype(int)
test_df['is_working_day'] = ((test_df['is_holiday'] == 0) & (test_df['is_weekend'] == 0)).astype(int)

In [3]:
# Prepare output DataFrame
output_df = test_df[['row_id', 'date', 'section_id']].copy()
output_df['true_required_employees'] = 0

# Predict for each row
for idx, row in test_df.iterrows():
    section = row['section_id']
    date = row['date']
    is_weekend = row['is_weekend']
    is_holiday = row['is_holiday']
    row_id = row['row_id']
    
    # If weekend or holiday, set employees to 0
    if is_weekend == 1 or is_holiday == 1:
        output_df.loc[idx, 'true_required_employees'] = 0
        continue
    
    # Predict task time using Prophet model if available
    if section in task_time_models_2025:
        prophet_model = task_time_models_2025[section]
        future_df = pd.DataFrame({'ds': [date], 'is_weekend': [is_weekend], 'is_holiday': [is_holiday]})
        forecast = prophet_model.predict(future_df)
        task_time = max(0, forecast['yhat'].iloc[0])
    else:
        # Fallback: use average task time from training (if available)
        task_time = 60  # Default fallback value
    
    # Predict employees using regression and efficiency
    if section in employee_models:
        model_info = employee_models[section]
        reg_pred = model_info['regression_model'].predict(np.array([[task_time]]))[0]
        eff_pred = task_time / model_info['avg_task_per_employee'] if model_info['avg_task_per_employee'] > 0 else 0
        r2_val = model_info['training_r2']
        weight_reg = REGRESSION_WEIGHT_HIGH if r2_val > REGRESSION_WEIGHT_THRESHOLD else REGRESSION_WEIGHT_LOW
        final_pred = weight_reg * reg_pred + (1 - weight_reg) * eff_pred
        final_pred = np.round(final_pred).astype(int)
        final_pred = max(1, final_pred)
    else:
        final_pred = 1  # Fallback if no model
    output_df.loc[idx, 'true_required_employees'] = final_pred

# Reorder columns as requested
output_df = output_df[['row_id', 'true_required_employees']]
output_df.head(10)

Unnamed: 0,row_id,true_required_employees
0,f944201f75875b49278c1328d019123f5e7d6f67,2
1,3f7b4944e3128a8dc35c448db69452b7e0e7fb6d,4
2,d1d8c20b85c9d7dc6279811e1cbc411230d5c867,2
3,61c05b152a5ddea1005a630cf4f062d68f9cb333,3
4,a40f2ccf6a5a8a68c9e02bfaf89045c56db3e5ae,6
5,23738dca3dcfaa9fa43905c4ee40849d25154604,5
6,cf95a3d507560c0c1c12b4362f994480f3af8eb1,2
7,99332e7824cd87585e2edc022daa70ab1ba0287a,4
8,ff8e449af42069a8e80e924daef37b2f4c1588ae,2
9,f7b15dc612ea5c45bbd82b175868196e3d4a49c7,3


In [4]:
# Export the prediction results to CSV
output_df.to_csv('task2_predicted_employees.csv', index=False)
print('✅ Predictions exported to task2_predicted_employees.csv')

✅ Predictions exported to task2_predicted_employees.csv
