# Multi Layer Perceptron

This is the Notebook used for the Implementation, Training and Testing of a numpy-based Multi Layer Perceptron classifier.

## Importing Data - Preparation

Separating data into training, validation and testing. This process uses the Scikit-Learn [train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) class.

In [3]:
import pandas as pd

df = pd.read_csv('../data/loan_approval/loan_data_refined.csv')

df.head()

Unnamed: 0.1,Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,0,22.0,female,Master,11.183699,0,RENT,10.463103,PERSONAL,16.02,0.49,3.0,561,No,1
1,1,21.0,female,High School,9.41589,0,OWN,6.907755,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,2,25.0,female,High School,9.428512,3,MORTGAGE,8.612503,MEDICAL,12.87,0.44,3.0,635,No,1
3,3,23.0,female,Bachelor,11.28669,0,RENT,10.463103,MEDICAL,15.23,0.44,2.0,675,No,1
4,4,24.0,male,Master,11.099453,1,RENT,10.463103,MEDICAL,14.27,0.53,4.0,586,No,1


In [4]:
df.describe()

Unnamed: 0.1,Unnamed: 0,person_age,person_income,person_emp_exp,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,loan_status
count,44924.0,44924.0,44924.0,44924.0,44924.0,44924.0,44924.0,44924.0,44924.0,44924.0
mean,22485.167616,27.693193,11.121073,5.338527,8.940718,11.006575,0.139758,5.840397,632.498108,0.222353
std,12993.058492,5.728904,0.553768,5.740106,0.710852,2.978941,0.087184,3.818092,50.353701,0.415832
min,0.0,20.0,8.987197,0.0,6.214608,5.42,0.0,2.0,390.0,0.0
25%,11234.75,24.0,10.761492,1.0,8.517193,8.59,0.07,3.0,601.0,0.0
50%,22466.5,26.0,11.113089,4.0,8.987197,11.01,0.12,4.0,639.0,0.0
75%,33759.25,30.0,11.469151,8.0,9.412097,13.0,0.19,8.0,670.0,0.0
max,44999.0,62.0,13.945418,40.0,10.463103,20.0,0.66,30.0,772.0,1.0


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer, LabelEncoder
from sklearn.impute import SimpleImputer

TARGET = "loan_status"

X = df.drop(columns=[TARGET]).copy()
y = df[TARGET].copy()

X_train_df, X_test_df, y_train_s, y_test_s = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

for _df in (X_train_df, X_test_df):
    obj_cols = _df.select_dtypes(include="object").columns
    _df[obj_cols] = _df[obj_cols].replace("", np.nan)

def coerce_bool_like(dfx: pd.DataFrame) -> pd.DataFrame:
    out = dfx.copy()
    for c in out.columns:
        if out[c].dtype == "bool":
            continue
        if out[c].dtype == "object":
            lower = out[c].str.strip().str.lower()
            if lower.dropna().isin({"yes","no","true","false","y","n","t","f","0","1"}).all():
                out[c] = lower.isin({"yes","true","y","t","1"})
    return out

X_train_df = coerce_bool_like(X_train_df)
X_test_df  = coerce_bool_like(X_test_df)

num_cols  = X_train_df.select_dtypes(include=["number"]).columns.tolist()
bool_cols = X_train_df.select_dtypes(include=["bool"]).columns.tolist()
cat_cols  = [c for c in X_train_df.columns if c not in num_cols + bool_cols]

num_pipe = Pipeline([
    ("imp", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
])

bool_pipe = Pipeline([
    ("to_float", FunctionTransformer(lambda X: X.astype(float))),
    ("imp", SimpleImputer(strategy="most_frequent")),
    ("to_pm1", FunctionTransformer(lambda X: np.where(X > 0.5, 1.0, -1.0))),
])

cat_pipe = Pipeline([
    ("imp", SimpleImputer(strategy="most_frequent")),
    ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
    ("to_pm1", FunctionTransformer(lambda X: 2.0 * X - 1.0)),
])

preproc = ColumnTransformer(
    transformers=[
        ("num",  num_pipe,  num_cols),
        ("bool", bool_pipe, bool_cols),
        ("cat",  cat_pipe,  cat_cols),
    ],
    remainder="drop"
)

X_train = preproc.fit_transform(X_train_df)
X_test  = preproc.transform(X_test_df)

X_train = X_train.astype(np.float64, copy=False)
X_test  = X_test.astype(np.float64, copy=False)

le = LabelEncoder()
y_train = le.fit_transform(y_train_s.astype(str))
y_test  = le.transform(y_test_s.astype(str))

K = len(le.classes_)
input_dim = X_train.shape[1]

print("Shapes:", X_train.shape, X_test.shape)
print("Value range (train):", float(X_train.min()), "to", float(X_train.max()))
print("Bool cols mapped to ±1:", bool_cols)


Shapes: (35939, 26) (8985, 26)
Value range (train): -4.813166360666915 to 6.345548280596645
Bool cols mapped to ±1: ['previous_loan_defaults_on_file']


In [26]:
K = len(np.unique(y))                 # number of classes

In [27]:
# sanity checks before training
assert X_train.ndim == 2 and X_train.dtype.kind in "fc", "X_train must be float matrix"
assert y_train.ndim == 1 and np.issubdtype(y_train.dtype, np.integer), "y_train must be int labels"
assert y_train.min() == 0 and y_train.max() == K-1, "labels must be in [0..K-1]"


In [28]:
import numpy as np
from sklearn.metrics import accuracy_score


def glorot_uniform(fan_in, fan_out):
    limit = np.sqrt(6.0 / (fan_in + fan_out))
    return np.random.uniform(-limit, limit, size=(fan_out, fan_in))

def init_vector_glorot(fan_out):
    return np.zeros(fan_out)

def softmax(z):
    z = z - np.max(z)
    e = np.exp(z)
    return e / np.sum(e)

def one_hot(y_i, K):
    v = np.zeros(K, dtype=float)
    v[y_i] = 1.0
    return v

tanh = np.tanh
tanhp = lambda a: (1.0 - a**2) 


class HiddenLayer:
    def __init__(self, fan_in, fan_out):
        self.W = glorot_uniform(fan_in, fan_out)  # shape (fan_out, fan_in)
        self.b = init_vector_glorot(fan_out)      # shape (fan_out,)
        self.z = None
        self.a = None
        self.prev_a = None

    def forward(self, x):
        self.prev_a = x
        self.z = self.W @ x + self.b
        self.a = tanh(self.z)
        return self.a


def forward_probs(x, layers, W2, b2):
    a = x
    for layer in layers:
        a = layer.forward(a)
    z2 = W2 @ a + b2
    p = softmax(z2)
    return p, a


K = len(np.unique(y))                 # classes
input_dim = X_train.shape[1]
H = 16
NLayers = 3

HiddenLayers = []
HiddenLayers.append(HiddenLayer(input_dim, H))
for _ in range(NLayers - 1):
    HiddenLayers.append(HiddenLayer(H, H))

W2 = glorot_uniform(H, K)   # shape (K, H)
b2 = np.zeros(K)

eta = 0.01     
epochs = 100
clip_value = 5.0    # gradient clip (optional, but helps stability)

N = len(X_train)
indices = np.arange(N)

for epoch in range(epochs):

    np.random.shuffle(indices)

    for idx in indices:
        x_i = X_train[idx]          # shape (D,)
        y_i = int(y_train[idx])     # scalar int

        # forward
        p, a_last = forward_probs(x_i, HiddenLayers, W2, b2)

        y_one = one_hot(y_i, K)
        delta_out = p - y_one                               # (K,)

        W2_old = W2.copy()

        # grads output
        dW2 = np.outer(delta_out, a_last)                   # (K, H)
        db2 = delta_out                                     # (K,)

        next_delta = delta_out            # current delta at output
        next_W = W2_old                   # use cached W2 for first step

        for layer in reversed(HiddenLayers):

            g = next_W.T @ next_delta                       # (H,)
            delta = g * tanhp(layer.a)                      # (H,)

            dW = np.outer(delta, layer.prev_a)              # (H, in_dim)
            db = delta                                      # (H,)

            W_old = layer.W.copy()

            if np.linalg.norm(dW) > clip_value:
                dW = dW * (clip_value / (np.linalg.norm(dW) + 1e-12))
            if np.linalg.norm(db) > clip_value:
                db = db * (clip_value / (np.linalg.norm(db) + 1e-12))

            layer.W -= eta * dW
            layer.b -= eta * db

            next_delta = delta
            next_W = W_old

        if np.linalg.norm(dW2) > clip_value:
            dW2 = dW2 * (clip_value / (np.linalg.norm(dW2) + 1e-12))
        if np.linalg.norm(db2) > clip_value:
            db2 = db2 * (clip_value / (np.linalg.norm(db2) + 1e-12))

        W2 -= eta * dW2
        b2 -= eta * db2

    probs = np.array([forward_probs(x_i, HiddenLayers, W2, b2)[0] for x_i in X_train])
    y_pred = np.argmax(probs, axis=1)
    print(f"Epoch {epoch:03d} | train acc: {accuracy_score(y_train, y_pred):.4f}")




print("Final W2:", W2, "Final b2:", b2)
for i, layer in enumerate(HiddenLayers, 1):
    print(f"Layer {i} weights:\n{layer.W}\nLayer {i} bias:\n{layer.b}")



Epoch 000 | train acc: 0.9122
Epoch 001 | train acc: 0.9153
Epoch 002 | train acc: 0.9140
Epoch 003 | train acc: 0.9230
Epoch 004 | train acc: 0.9223
Epoch 005 | train acc: 0.9299
Epoch 006 | train acc: 0.9316
Epoch 007 | train acc: 0.9353
Epoch 008 | train acc: 0.9358
Epoch 009 | train acc: 0.9369
Epoch 010 | train acc: 0.9370
Epoch 011 | train acc: 0.9372
Epoch 012 | train acc: 0.9361
Epoch 013 | train acc: 0.9350
Epoch 014 | train acc: 0.9359
Epoch 015 | train acc: 0.9367
Epoch 016 | train acc: 0.9348
Epoch 017 | train acc: 0.9344
Epoch 018 | train acc: 0.9336
Epoch 019 | train acc: 0.9373
Epoch 020 | train acc: 0.9366
Epoch 021 | train acc: 0.9326
Epoch 022 | train acc: 0.9321
Epoch 023 | train acc: 0.9371
Epoch 024 | train acc: 0.9351
Epoch 025 | train acc: 0.9376
Epoch 026 | train acc: 0.9373
Epoch 027 | train acc: 0.9376
Epoch 028 | train acc: 0.9363
Epoch 029 | train acc: 0.9368
Epoch 030 | train acc: 0.9367
Epoch 031 | train acc: 0.9366
Epoch 032 | train acc: 0.9370
Epoch 033 

In [29]:

probs = np.array([forward_probs(x_i, HiddenLayers, W2, b2)[0] for x_i in X_test])  # (N, K)
y_pred = np.argmax(probs, axis=1)
print("Testing Accuracy:", accuracy_score(y_test, y_pred))

Testing Accuracy: 0.9364496382860322
