In [None]:
"""
05 Inference
Use the trained model to predict attrition on new/unseen data.
"""

In [None]:
from src.inference import predict_attrition
from src.feature_engineering import engineer_features
import pandas as pd
from IPython.display import display, Markdown

In [None]:
display(Markdown("""
# Inference
This notebook uses the trained attrition model to make predictions on new or unseen employee data.
"""))

In [None]:
display(Markdown("""
## Load New/Unseen Data
We load new or held-out employee data for inference. Replace with actual new data as needed.
"""))

In [None]:
# Load new/unseen data (simulate with held-out set)
infer_df = pd.read_csv('data/employee_data_cleaned.csv')  # Replace with actual new data if available

In [None]:
display(Markdown("""
## Prepare Input Data
We remove columns not needed for prediction (e.g., Attrition, EmployeeId).
"""))

In [None]:
# Drop Attrition and EmployeeId if present
infer_input = infer_df.drop(columns=[col for col in ['Attrition', 'EmployeeId'] if col in infer_df.columns])

In [None]:
display(Markdown("""
## Engineer Features for Inference
We apply the same feature engineering steps as in training to ensure consistency.
"""))

In [None]:
# Engineer features for inference
df_infer_fe = engineer_features(infer_input)

In [None]:
display(Markdown("""
## Preprocess Categorical Columns
We preprocess categorical variables to match the format used during model training.
"""))

In [None]:
# Preprocess categorical columns as in training
categorical_cols = ['BusinessTravel', 'Department', 'EducationField', 
                   'Gender', 'JobRole', 'MaritalStatus', 'Over18', 'OverTime', 'AgeGroup']
for col in categorical_cols:
    if col in df_infer_fe.columns:
        df_infer_fe[col] = df_infer_fe[col].astype(str).str.replace(' ', '_').str.replace('&', '_and_')

In [None]:
display(Markdown("""
## Predict Attrition
We use the trained model to predict attrition probabilities and classes for each employee.
"""))

In [None]:
# Predict attrition
predictions = predict_attrition(df_infer_fe)
predictions.head() 