## 07_Attrition_Prediction_Inference.ipynb

**Purpose:**  
Run inference using the trained attrition model on processed HRMS data.

**Input:**  
- `data/processed/`  
- `notebooks/models/`

**Output:**  
- `data/processed/HRMS_with_attrition_predictions.csv`

**Notes:**  
- Loads pre-trained attrition model and preprocessor.  
- Appends prediction probabilities and labels to HRMS data.

In [1]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path


In [2]:
DATA_PATH = Path(r"C:\Users\abanu\Documents\t_iq_hr\data\processed")
MODEL_PATH = Path(r"C:\Users\abanu\Documents\t_iq_hr\notebooks\models")


In [3]:
hrms_df = pd.read_csv(DATA_PATH / "HRMS_cleaned.csv")

print(hrms_df.shape)
hrms_df.head()


(10000, 11)


Unnamed: 0,employee_id,name,department,job_role,location,current_salary,satisfaction_score,engagement_score,num_skills,years_at_company,trainings_count
0,EMP000001,Vikram Singh,HR,Data Scientist,"New York, USA",4544478,0.78,0.8,7,12,0
1,EMP000002,Karan Patel,Marketing,Data Scientist,"Chennai, India",5180268,0.71,0.93,8,7,4
2,EMP000003,Vikram Malhotra,Marketing,Senior Software Engineer,"Chennai, India",2589268,0.81,0.56,6,3,3
3,EMP000004,Siddharth Khan,HR,ML Engineer,"Bengaluru, India",1321856,0.43,0.95,7,15,3
4,EMP000005,Priya Nair,Legal,ML Engineer,Remote,4371479,0.41,0.7,4,7,2


In [4]:
def standardize_emp_id(series, prefix="EMP", pad=4):
    return (
        series.astype(str)
        .str.extract(r'(\d+)')[0]
        .astype(float)
        .astype('Int64')
        .apply(lambda x: f"{prefix}{str(x).zfill(pad)}" if pd.notna(x) else None)
    )


In [5]:
hrms_df.rename(columns={'employee_id': 'EmployeeID'}, inplace=True)
hrms_df['EmployeeID'] = standardize_emp_id(hrms_df['EmployeeID'])

hrms_df[['EmployeeID']].head()


Unnamed: 0,EmployeeID
0,EMP0001
1,EMP0002
2,EMP0003
3,EMP0004
4,EMP0005


In [6]:
attrition_model = joblib.load(MODEL_PATH / "attrition_model.pkl")
preprocessor = joblib.load(MODEL_PATH / "preprocessor.pkl")

print("✅ Model & preprocessor loaded")


✅ Model & preprocessor loaded


In [7]:
X = hrms_df.drop(columns=['EmployeeID'], errors='ignore')
X_transformed = preprocessor.transform(X)


In [9]:
X = hrms_df.drop(columns=['EmployeeID'], errors='ignore')


In [10]:
attrition_prob = attrition_model.predict_proba(X)[:, 1]

hrms_df['attrition_probability'] = attrition_prob
hrms_df['attrition_risk'] = (attrition_prob >= 0.5).astype(int)


In [11]:
attrition_model


In [12]:
def risk_bucket(p):
    if p >= 0.75:
        return 'High'
    elif p >= 0.4:
        return 'Medium'
    else:
        return 'Low'

hrms_df['attrition_risk_level'] = hrms_df['attrition_probability'].apply(risk_bucket)


In [13]:
hrms_df.to_csv(
    DATA_PATH / "HRMS_with_attrition_predictions.csv",
    index=False
)

print("✅ Notebook-07 completed successfully")


✅ Notebook-07 completed successfully
