My first neural network:

* Firstly splitting and preprocessing the data

* Creating the neural network from scratch

Notes:

* Gained ~95% accuracy with training loop
* ~75% accuracy wiht testing set

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# function to retrieve data, specificallly for simple classification
def retrieve_data(filename):
    data = pd.read_csv(filename)
    
    dataset = data.values
    X = dataset[:, :-1]
    y = dataset[:, -1]
    
    return X, y

X, y = retrieve_data('GermanCredit.csv')

#columns that are categorical, numerical, and binary (minus the output column)
categoricals = np.array([0,2,3,5,6,8,9,11,13,14,16])
numericals =np.array([1,4,12,15,17])
binaries = np.array([18,19])


#splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.reshape(-1, 1)

# data pipelining and transformers:
# i.e. OneHotEncoding categorical and binary vars and standardizing numerical vars 

# one hot encoder
pre = ColumnTransformer(
    transformers =[
        ('cat', OneHotEncoder(sparse_output=True), categoricals),
        ('num', StandardScaler(), numericals),
        ('bin', OneHotEncoder(), binaries)
    ]
)

pipeline = Pipeline([
    ('preprocessor', pre)
])

#using pipeline on both training and testing datasets
X_train_trans = pipeline.fit_transform(X_train)
X_test_trans = pipeline.transform(X_test)

In [2]:
# testing shapes and amt of unique values in the categorical columns
print("X_train shape:", X_train.shape)
print("Transformed X_train shape:", X_train_trans.shape)
print(f"Number of features in transformed X_train: {X_train_trans.shape[1]}")
print("Categorical columns unique values:")
for col in categoricals:
    print(f"Column {col} unique values: {len(np.unique(X[:, col]))}")
print(y_train.shape)

batch_size = X.shape
print("Batch size:", batch_size)

batch_sizey = y.shape
print("Batch size:", batch_sizey)


X_train shape: (800, 20)
Transformed X_train shape: (800, 59)
Number of features in transformed X_train: 59
Categorical columns unique values:
Column 0 unique values: 4
Column 2 unique values: 5
Column 3 unique values: 10
Column 5 unique values: 5
Column 6 unique values: 5
Column 8 unique values: 4
Column 9 unique values: 3
Column 11 unique values: 4
Column 13 unique values: 3
Column 14 unique values: 3
Column 16 unique values: 4
(800, 1)
Batch size: (1000, 20)
Batch size: (1000,)


In [3]:
#starter neural network

#initializing biases and weights based off the dimensions of the input and output data
class neural():
    def __init__(self, inputs_dim, outputs_dim, hidden_dim1, hidden_dim2):
        
        # setting hidden weights for hidden layer
        self.hidden_weights1 = self.glorot_weights(inputs_dim, hidden_dim1)
        self.hidden_bias1= np.zeros((1, hidden_dim1), dtype=np.float64)

        self.hidden_weights2 = self.glorot_weights(hidden_dim1, hidden_dim2)
        self.hidden_bias2 = np.zeros((1, hidden_dim2), dtype=np.float64)

        # output layer weights and bias
        self.output_weights = self.glorot_weights(hidden_dim2, outputs_dim)
        self.output_bias = np.zeros((1, outputs_dim), dtype=np.float64)
        
    # weight initialization from the xavier glorot paper
    def glorot_weights(self, input_dim, output_dim):
        limit = np.sqrt(6 / (input_dim + output_dim))
        
        # using random uniform values to prevent gradients from exploding and maintaining stability
        return np.random.uniform(-limit, limit, (output_dim, input_dim)).astype(np.float64)

    # activation function to introduce nonlinearity
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    #help computing gradient, gives directions to weight and bias update based on value given (x)
    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return (x > 0).astype(float)
    
    # feed forward function
    def forward(self, inputs, training=True):
        self.inputs = inputs

        #layer 1 
        self.layer_1 = self.relu(np.dot(inputs, self.hidden_weights1.T) + self.hidden_bias1)
        
        if training:
            self.layer_1 = self.dropout(self.layer_1)
        
        # layer 2
        self.layer_2 = self.relu(np.dot(self.layer_1, self.hidden_weights2.T) + self.hidden_bias2)

        # layer 3 (output layer)
        self.layer_3 = self.sigmoid(np.dot(self.layer_2, self.output_weights.T) + self.output_bias)
        return self.layer_3
    
    # loss function
    def bce_loss(self, y, y_hat):
        epsilon = 1e-8  # to avoid log(0)
        return -np.mean(y * np.log(y_hat + epsilon) + (1 - y) * np.log(1 - y_hat + epsilon))

    def dropout(self, layer, rate= 0.25):
        mask = (np.random.rand(*layer.shape) > rate).astype(np.float64)
        return layer * mask / (1 - rate)
    
    # backpropagation, computing gradients and error, updates both + weights/biases
    def backward(self, y, y_hat, learning_rate=0.0001):
        
        #error = y_hat - y
        #output_gradient = error * (y_hat * (1 - y_hat))
        
        output_gradient = y_hat - y
        self.output_weights -= learning_rate * np.dot(output_gradient.T, self.layer_2)
        self.output_bias -= learning_rate * np.sum(output_gradient, axis=0, keepdims=True)

        # gradient for hidden layer 2
        hidden_error2 = np.dot(output_gradient, self.output_weights)  # Fixed here
        hidden_gradient2 = hidden_error2 * self.relu_derivative(self.layer_2)
        
        # update hidden layer 2 weights and bias
        self.hidden_weights2 -= learning_rate * np.dot(hidden_gradient2.T, self.layer_1)
        self.hidden_bias2 -= learning_rate * np.sum(hidden_gradient2, axis=0, keepdims=True)

        # compute gradient for hidden layer 1
        hidden_error1 = np.dot(hidden_gradient2, self.hidden_weights2.T)
        hidden_gradient1 = hidden_error1 * self.relu_derivative(self.layer_1)
        
        # update hidden layer 1 weights and bias
        self.hidden_weights1 -= learning_rate * np.dot(hidden_gradient1.T, self.inputs)
        self.hidden_bias1 -= learning_rate * np.sum(hidden_gradient1, axis=0, keepdims=True)

    

In [4]:
# training loop
def train(X, y):
    
    X = np.array(X, dtype=np.float64)
    y = np.array(y, dtype=np.float64)
    
    if len(y.shape) == 1:
        y = y.reshape(-1, 1)
    
    input_dim = X.shape[1]
    hidden_dim1 = 16
    hidden_dim2 = 16
    output_dim = y.shape[1]

    # def's instance of neural
    n = neural(input_dim, output_dim, hidden_dim1, hidden_dim2)
    epochs = 50001
    
    # training using batches
    batch_size = 64

    for epoch in range(epochs):
        for start in range(0, X.shape[0], batch_size):
            end = start + batch_size
            X_batch = X[start:end]
            y_batch = y[start:end]
            predictions = n.forward(X_batch)
            loss = n.bce_loss(y_batch, predictions)
            n.backward(y_batch, predictions)
        if epoch % 10000 == 0:
            print(f"epoch {epoch}/{epochs -1}, loss: {loss}")
            

    # training normally
    '''
    for epoch in range(epochs):
        predictions = n.forward(X)
        loss = n.bce_loss(y, predictions)
        n.backward(y, predictions)
    
        if epoch % 10000 == 0:
            print(f"epoch {epoch}/{epochs -1}, loss: {loss}")
    '''
    return n


In [5]:
# training the model
model = train(X_train_trans, y_train)

epoch 0/50000, loss: 0.6790791504799314
epoch 10000/50000, loss: 0.25826133440179255
epoch 20000/50000, loss: 0.11747269470526343
epoch 30000/50000, loss: 0.040812720526524356
epoch 40000/50000, loss: 0.1279028170818199
epoch 50000/50000, loss: 0.08054158309288809


In [6]:
# evaluating it
def evaluate(model, X, y):

    y = np.array(y, dtype=np.float64)

    if len(y.shape) == 1:
        y = y.reshape(-1, 1)

    predictions = model.forward(X, training=False)
    loss = model.bce_loss(y, predictions)

    predicted_labels = (predictions > 0.5).astype(int)
    accuracy = np.mean(predicted_labels == y)
    
    print(f"Evaluation Loss: {loss}")
    print(f"Evaluation Accuracy: {accuracy*100}%")


In [7]:
print(evaluate(model, X_test_trans, y_test))

Evaluation Loss: 2.3096013530009762
Evaluation Accuracy: 72.0%
None
