<a href="https://colab.research.google.com/github/carlos-alves-one/-NeuroCredit/blob/master/artificial_neural_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Goldsmiths University of London
### Author.....: Carlos Manuel de Oliveira Alves
### Student...: cdeol003
### Created...: 27/03/2023
### FYP..........: NeuroCredit

## I. Import Libraries and Packages

In [138]:
# Importing the library numpy for mathematical operations
import numpy as np

# Importing the pandas library to read the data
import pandas as pd

# Importing the StandardScaler module to scale the data
from sklearn.preprocessing import StandardScaler

# Importing module to split the data into training and test sets
from sklearn.model_selection import train_test_split

## II. Activation function (Sigmoid)

In [139]:
# Defining the sigmoid function
def sigmoid(x):

  # Applying the sigmoid function on x and returning the value
  return 1 / (1 + np.exp(-x))

## III. Derivative of the activation function

In [140]:
# Defining the sigmoid derivative function
def sigmoid_derivative(x):

  # Applying the sigmoid derivative function on x and returning the value
  return x * (1 - x)

## IV. Mean squared error loss function

In [141]:
# Defining the mean squared error loss function
def mse_loss(y_true, y_pred):

  # Calculating the mean squared error loss and returning the value
  return np.mean((y_true - y_pred)**2)

## V. Performance metrics

### a. Accuracy function for classification

In [142]:
# Defining the accuracy function for classification
def accuracy(y_true, y_pred):

  # Calculating the accuracy and returning the value
  return np.mean(y_true == y_pred)

### b. Precision function for classification

In [143]:
# Defining the precision function for classification
def precision(y_true, y_pred):

  # Calculating the true positives and storing the value in tp
  tp = np.sum((y_true == 1) & (y_pred == 1))

  # Calculating the false positives and storing the value in fp
  fp = np.sum((y_true == 0) & (y_pred == 1))
    
  # Calculating the precision and returning the value
  return tp / (tp + fp)

### c. Recall function for classification

In [144]:
# Defining the recall function for classification
def recall(y_true, y_pred):

  # Calculating the true positives and storing the value in tp
  tp = np.sum((y_true == 1) & (y_pred == 1))

  # Calculating the false negatives and storing the value in fn
  fn = np.sum((y_true == 1) & (y_pred == 0))

  # Return the recall value
  return tp / (tp + fn)

### d. F1 score function for classification

In [145]:
# Defining the f1 score function for classification
def f1_score(y_true, y_pred):

  # Calculating the precision and storing the value in prec
  prec = precision(y_true, y_pred)

  # Calculating the recall and storing the value in rec
  rec = recall(y_true, y_pred)

  # Calculating the f1 score and returning the value
  return 2 * (prec * rec) / (prec + rec)

## VI. Neural network architecture

In [146]:
# Defining the number of input nodes
input_nodes = 7

# Defining the number of hidden nodes
hidden_nodes = 10

# Defining the number of output nodes
output_nodes = 1

## VII. Weights and biases initialization

In [147]:
# Setting the random seed
np.random.seed(0)

# Initializing the weights of the input layer 
weights_input_hidden = np.random.rand(input_nodes, hidden_nodes)

# Initializing the weights of the hidden layer
weights_hidden_output = np.random.rand(hidden_nodes, output_nodes)

# Initializing the biases of the hidden layer
bias_hidden = np.random.rand(hidden_nodes)

# Initializing the biases of the output layer
bias_output = np.random.randn(1, output_nodes)

## VIII. Learning rate

In [148]:
# Defining the learning rate
lr = 0.1

## IX. Train the neural network

In [149]:
# Defining the train function
def train(X_train, y_train, epochs):

  # Defining the global variables
  global weights_input_hidden, weights_hidden_output, bias_hidden, bias_output

  # Iterating over the number of epochs
  for epoch in range(epochs):

    #============== Forward pass ==============#

    # Calculating the input to the hidden layer
    hidden_layer_input = np.dot(X_train, weights_input_hidden) + bias_hidden


    # Calculating the output of the hidden layer
    hidden_layer_output = sigmoid(hidden_layer_input)

    # Calculating the input to the output layer
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output

    # Calculating the output of the output layer
    output_layer_output = sigmoid(output_layer_input)

    #============== Calculate the error (mean squared error) ==============#

    # Calculating the error in the output layer
    output_error = y_train - output_layer_output

    # Calculating the mean squared error
    mse = mse_loss(y_train, output_layer_output)

    # Printing the mean squared error
    print(f">> Epoch {epoch + 1}, MSE: {mse * 100:.2f} %")

    #==================== Backpropagation ====================#

    # Calculating the gradient of the output layer
    output_delta = output_error * sigmoid_derivative(output_layer_output)
    
    # Calculating the error in the hidden layer
    hidden_error = np.dot(output_delta, weights_hidden_output.T)

    # Calculating the gradient of the hidden layer
    hidden_delta = hidden_error * sigmoid_derivative(hidden_layer_output)

    #=============== Update weights and biases ===============#

    # Updating the weights of the hidden layer
    weights_hidden_output += np.dot(hidden_layer_output.T, output_delta) * lr

    # Updating the weights of the input layer
    weights_input_hidden += np.dot(X_train.T, hidden_delta) * lr

    # Updating the biases of the output layer
    bias_output += np.sum(output_delta, axis=0) * lr

    # Updating the biases of the hidden layer
    bias_hidden += np.sum(hidden_delta, axis=0) * lr


## X. Predict the output

In [150]:
# Defining the predict function
def predict(X_test):

  # Calculating the input to the hidden layer
  hidden_layer_input = np.dot(X_test, weights_input_hidden) + bias_hidden

  # Calculating the output of the hidden layer
  hidden_layer_output = sigmoid(hidden_layer_input)

  # Calculating the input to the output layer
  output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output

  # Calculating the output of the output layer
  output_layer_output = sigmoid(output_layer_input)

  # Returning the output of the output layer
  return np.round(output_layer_output)
  

## XI. Loading the dataset and pre-processing

### a. Prepare the dataset

In [151]:
# Reading the data with credit loans data
data = pd.read_csv('data.csv')

# Get the list of columns with int64 data type
int_columns = data.select_dtypes(include=['int64']).columns

# Convert the int64 columns to float64 columns
data[int_columns] = data[int_columns].astype('float64')

# Create a new dataframe with the columns that are highly correlated with the income
data_new = data[['income',
                'assets_value',
                'debt_to_income_ratio',
                'length_of_credit_history',
                'number_of_credit_accounts',
                'number_of_credit_accounts_opened_last_12_months',
                'saving_account_balance',
                'approval_status']]

# Print the first 5 rows of the new dataframe
data_new.head().T

Unnamed: 0,0,1,2,3,4
income,46319.0,15480.0,21614.0,25874.0,20389.0
assets_value,14680.0,46713.0,13026.0,27908.0,44309.0
debt_to_income_ratio,41.0,82.0,68.0,34.0,75.0
length_of_credit_history,24.0,0.0,99.0,55.0,30.0
number_of_credit_accounts,3.0,3.0,4.0,4.0,0.0
number_of_credit_accounts_opened_last_12_months,4.0,2.0,2.0,2.0,5.0
saving_account_balance,10207.0,16666.0,10413.0,16645.0,16366.0
approval_status,Rejected,Rejected,Rejected,Rejected,Rejected


### b. Select features and target

In [152]:
# Map the approval_status column to numerical values (0 for "Rejected" and 1 for "Approved")
data_new['approval_status'] = data_new['approval_status'].map({"Rejected": 0, "Approved": 1})

# Store the features in the X variable and the target in the y variable
X = data_new.drop('approval_status', axis=1)
y = data_new['approval_status']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_new['approval_status'] = data_new['approval_status'].map({"Rejected": 0, "Approved": 1})


### c. Standardize the features

In [153]:
# Create an instance of the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the features and transform the features
X_scaled = scaler.fit_transform(X)

### d. Split the dataset into train and test sets

In [154]:
# Create the X_train, X_test, y_train, y_test variables by splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Reshape y_train and y_test arrays to be 1-dimensional
y_train = y_train.values.reshape(-1, 1)
y_test = y_test.values.reshape(-1, 1)

## XII. Train the neural network 

In [155]:
# Defining the number of epochs
epochs = 1000

# Training the neural network
train(X_train, y_train, epochs)

>> Epoch 1, MSE: 82.44 %
>> Epoch 2, MSE: 10.52 %
>> Epoch 3, MSE: 10.34 %
>> Epoch 4, MSE: 10.14 %
>> Epoch 5, MSE: 10.05 %
>> Epoch 6, MSE: 10.00 %
>> Epoch 7, MSE: 10.00 %
>> Epoch 8, MSE: 10.20 %
>> Epoch 9, MSE: 9.92 %
>> Epoch 10, MSE: 9.98 %
>> Epoch 11, MSE: 10.10 %
>> Epoch 12, MSE: 10.65 %
>> Epoch 13, MSE: 10.44 %
>> Epoch 14, MSE: 10.05 %
>> Epoch 15, MSE: 10.01 %
>> Epoch 16, MSE: 10.48 %
>> Epoch 17, MSE: 10.13 %
>> Epoch 18, MSE: 9.89 %
>> Epoch 19, MSE: 10.10 %
>> Epoch 20, MSE: 9.90 %
>> Epoch 21, MSE: 10.19 %
>> Epoch 22, MSE: 9.82 %
>> Epoch 23, MSE: 9.83 %
>> Epoch 24, MSE: 9.89 %
>> Epoch 25, MSE: 10.26 %
>> Epoch 26, MSE: 9.81 %
>> Epoch 27, MSE: 9.87 %
>> Epoch 28, MSE: 10.21 %
>> Epoch 29, MSE: 9.78 %
>> Epoch 30, MSE: 9.79 %
>> Epoch 31, MSE: 9.83 %
>> Epoch 32, MSE: 9.99 %
>> Epoch 33, MSE: 10.59 %
>> Epoch 34, MSE: 10.33 %
>> Epoch 35, MSE: 9.88 %
>> Epoch 36, MSE: 9.97 %
>> Epoch 37, MSE: 10.52 %
>> Epoch 38, MSE: 10.21 %
>> Epoch 39, MSE: 9.78 %
>> Epoch 40

## XIII. Make predictions

In [156]:
# Making predictions on the test set
y_pred = predict(X_test)