# 03 - Feature Engineering (V3)

- Create bins (training, leadership, projects/tenure, tenure, age)
- Create interactions and log1p for highly skewed positives
- Drop identifiers; document selection
- Save to `../v3_data/employee_promotion_features.csv`


In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

INP = Path('../v3_data/employee_promotion_clean.csv')
OUT = Path('../v3_data/employee_promotion_features.csv')
OUT.parent.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(INP)

# Bins and engineered features
df['Training_Level'] = pd.qcut(df['Training_Hours'], q=5, labels=['Very Low','Low','Moderate','High','Very High'])
df['Leadership_Level'] = pd.qcut(df['Leadership_Score'], q=4, labels=['Low','Medium','High','Very High'])

df['Projects_per_Years'] = df['Projects_Handled'] / (df['Years_at_Company'] + 1)
df['Project_Level'] = pd.qcut(df['Projects_per_Years'], q=4, labels=['Low','Moderate','High','Very High'])

df['Tenure_Level'] = pd.qcut(df['Years_at_Company'], q=4, labels=['New','Mid','Senior','Veteran'])
df['Age_Group'] = pd.qcut(df['Age'], q=4, labels=['Young','Early Mid','Late Mid','Senior'])

# Interaction example
df['Perf_x_Leader'] = df['Performance_Score'] * df['Leadership_Score']

# Log for skewed positives
if (df['Projects_per_Years'] >= 0).all():
    df['Projects_per_Years_log'] = np.log1p(df['Projects_per_Years'])

# Drop identifiers and redundant intermed.
for col in ['Employee_ID']:
    if col in df.columns:
        df.drop(columns=[col], inplace=True)
df.drop(columns=['Projects_per_Years'], inplace=True)

# Save
df.to_csv(OUT, index=False)
print('Saved:', OUT, 'Shape:', df.shape)
