# 02 — Feature Engineering
Define feature types and build the preprocessing pipeline.

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from src.preprocessing import load_and_clean, build_preprocessor, num_cols, nominal_cols, ordinal_cols

## 1. Load & Clean Data

In [None]:
df = load_and_clean()
print(df.shape)
df.head()

## 2. Feature Definitions

In [None]:
print('Numerical features :', num_cols)
print('Nominal features   :', nominal_cols)
print('Ordinal features   :', ordinal_cols)

## 3. Target Encoding
`good` → 1, `bad` → 0 (binary classification)

In [None]:
X = df.drop('Risk', axis=1)
y = df['Risk'].map({'good': 1, 'bad': 0})
print('Class distribution:')
print(y.value_counts())

## 4. Train / Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f'Train: {X_train.shape}  |  Test: {X_test.shape}')

## 5. Preprocessing Pipeline
- **StandardScaler** on numerical columns
- **OneHotEncoder** (drop first) on nominal columns
- **OrdinalEncoder** on Job (skill level 0–3)

In [None]:
preprocessor = build_preprocessor()
preprocessor

In [None]:
import numpy as np
X_train_transformed = preprocessor.fit_transform(X_train)
print('Transformed train shape:', X_train_transformed.shape)