In [73]:
# Importing the necessary libraries
import numpy as np
import pandas as pd
import warnings    
warnings.filterwarnings('ignore')

In [74]:
# Reading the dataset
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


##### Data preprocessing

In [75]:
# Dropping the unnecessary columns
df.drop(columns = ['ZIP Code'],inplace = True)
df.drop_duplicates(inplace = True)

In [76]:
# Shifting the target variable to the last column 
columns = ['ID','Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage','Securities Account', 'CD Account',
       'Online', 'CreditCard', 'Personal Loan']
df = df[columns]

In [77]:
# Splitting the independent & dependent features
x = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [78]:
# Performing the train test split
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.30,random_state = 45)

##### Neural Network

In [79]:
learning_rate = 0.05

In [80]:
def sigmoid(x):
    return (1/(1+np.exp(-1*x)))

In [81]:
def derivative_sigmoid(x):
    return sigmoid(x) *(1-sigmoid(x))

In [82]:
# Initializing the weights randomly
input_weights = np.random.rand(1,12) # The input layer consists of 12 neurons so we take 12 random weights
hidden_weights = np.random.rand(1,4) # The hidden layer consists of 6 neurons so we take 6 random weights

In [83]:
def model(new_input_weight,new_hidden_weight,x_train,y_train,learning_rate):

    # Forward Propagation

    input_layer1 = input_weights*x_train # Multiplying the input data with weights(cross product)
    input_layer2 = input_layer1.reshape(input_layer1.shape[0],4,3).sum(axis = 2) # Converting the dimension for hidden layer
    input_layer3 = sigmoid(input_layer2) # Passing through the activation function
    hidden_layer1 = hidden_weights*input_layer3  # Multiplying the data from activation fn with weights(cross product)
    hidden_layer2 = hidden_layer1.reshape(hidden_layer1.shape[0],1,4).sum(axis = 2) # Converting the dimension for output layer
    output_layer1 = sigmoid(hidden_layer2) # Passing through the activation function

    # Backward Propagation

    error = output_layer1 - y_train # Error in prediction
    delta_hidden = -1 * derivative_sigmoid(hidden_layer1) * input_layer3
    delta_hidden = (delta_hidden.sum(axis = 0)/output_layer1.shape[0]).reshape(1,4)
    new_hidden_weight = hidden_weights + (learning_rate * delta_hidden)
    new_weight = (np.ones((4,3)) * (new_hidden_weight.reshape(4,1))).reshape(1,12)
    delta_input = ((-1 * (new_weight) * (derivative_sigmoid(input_layer1)) * x_train).sum(axis=0) / (output_layer1.shape[0])).reshape(1,12)
    new_input_weight = new_input_weight + (learning_rate * delta_input)
    out = {"input_weight" : new_input_weight,
            "hidden_weight" : new_hidden_weight}
    return out

In [84]:
from sklearn.metrics import accuracy_score
def predict(weights,x_test,y_test):

    input_weight = weights['input_weight']
    hidden_weight = weights['hidden_weight']
    input_layer = input_weight * x_test # Multiplying the input data with weights(cross product)
    input_layer = input_layer.reshape(input_layer.shape[0],4,3).sum(axis=2)
    input_layer = sigmoid(input_layer)  # Passing through the activation function
    hidden_layer = hidden_weight * input_layer # Multiplying the data from activation fn with weights(cross product)
    hidden_layer = hidden_layer.reshape(hidden_layer.shape[0],1,4).sum(axis=2)
    output_layer = np.floor(sigmoid(hidden_layer))  # Passing through the activation function
    
    return accuracy_score(output_layer,y_test)*100

In [85]:
weights = model(input_weights,hidden_weights,x_train,y_train,learning_rate)

In [86]:
output = predict(weights,x_test,y_test)
print("Accuracy score: ",round(output,2))

Accuracy score:  89.2


##### Weight Optimization using Ant Colony 

In [87]:
def ConvertToMatrix(solution):
    weight = {'input_weight':(solution.reshape(4,4)[:3]).reshape(1,12),
             'hidden_weight': solution.reshape(4,4)[-1]}
    return weight

In [88]:
def fitness_function(population,x,y):
    # Here we consider the accuracy of the model with a particular set of weights to be the fitness function
    fitness_score = []
    for solution in population: # We take each set of weights and compute the accuracy 
        sol = ConvertToMatrix(solution)
        fitness_score.append(predict(sol,x,y)) # Returns the accuracy score that set of weights
    return fitness_score

In [89]:
# Generating the population
population_size = 10
population = []
for path in range(population_size):
    # Initializing the population with random weights
    weights = np.random.rand(1,16)
    population.append(weights)

# Calculating the accuracy for each path in the population
accuracy = np.array(fitness_function(population,x_test,y_test)) * 0.01
population = np.array(population)
column = sorted(list(set(list(population.reshape(160,)))))

# Cost matrix
cost_matrix = np.zeros((160,160))
#Pheromone matrix
pheromone_matrix = np.ones((160,160))

In [90]:
parameter = np.random.rand() #rho parameter

In [91]:
def metrics(weights,x_test,y_test):
    report = []
    for individual in weights:
        weight = ConvertToMatrix(individual)
        accuracy = fitness_function([individual],x_test,y_test)[0]
        report.append((weight,accuracy))
    return report 

In [92]:
# Populating the cost matrix
for weight in range(10):
    i = population[weight] 
    for j in range(0,15):
        cost_matrix[column.index(i[0][j])][column.index(i[0][j+1])] = 1 / accuracy[weight]

In [93]:
# Generating a path
def path_cost(cost_matrix,pheromone_matrix,initial_node,neighbours,column):
    probabilities =[] # Contains the probability of taking a path
    cost = 0
    initial_node = column.index(initial_node)
    for neighbour in neighbours:
        weight = column.index(neighbour)
        cost += (pheromone_matrix[initial_node][weight] * cost_matrix[initial_node][weight]) 
    for neighbour in neighbours:
        probabilities.append((pheromone_matrix[initial_node][weight] * cost_matrix[initial_node][weight])/cost)
    temp  = np.random.rand()
    for probability in range(len(probabilities)):
        if probabilities[probability] >= temp:
            return neighbours[probability]
    return neighbours[0]

In [94]:
# To find the neighbours of a node
def neighbours(population,initial_node):
    neighbour = []
    for j in population:
        for i in range(16):
            temp = j[0][i]
            if(temp == initial_node):
                if(i!=15 and j[0][i+1] not in neighbour):
                    neighbour.append(j[0][i+1])
                if(i!=0 and j[0][i-1] not in neighbour):
                    neighbour.append(j[0][i-1])
    return neighbour

In [95]:
def modify_pheromone_level(pheromone_matrix,initial_node,next_node,column):
    #If a path is chosen, then the pheromone level in that path is increased by 1
    pheromone_matrix[column.index(initial_node)][column.index(next_node)] += 1
    return pheromone_matrix

In [96]:
def decay_rate(parameter,cost_matrix,pheromone_matrix):
    # Decay rate for the pheromones
    for i in range(pheromone_matrix.shape[0]):
        for j in range(pheromone_matrix.shape[1]): 
            pheromone_matrix[i][j] = (1-parameter) * pheromone_matrix[i][j] + cost_matrix[i][j] 
    return pheromone_matrix

In [97]:
def sorting_criteria(metrics):
    return metrics[1]

In [98]:
def Ant_Colony(population_size,population,column,cost_matrix,pheromone_matrix,parameter,x,y,epoch):
    list_of_paths = [] # Contains the list of all paths
    for i in range(epoch):
        # Getting the start node from the list of weights
        initial_node = np.random.randint(0,len(column)) 
        path = [] # Contains the path
        path.append(column[initial_node])
        initial_node = column[initial_node]
        for i in range(15):
            # Gives the list of neighbours close to the initial node
            neighbour = neighbours(population,initial_node) 
            # The node to which the ant moves from initial node
            next_node = path_cost(cost_matrix,pheromone_matrix,initial_node,neighbour,column) 
            # Increasing the pheromone level in the path chosen by the ant
            pheromone  = modify_pheromone_level(pheromone_matrix,initial_node,next_node,column)
            # Decaying of pheromone level
            pheromone = decay_rate(parameter,cost_matrix,pheromone_matrix) 
            # Adding the next node to the current path
            path.append(next_node) 
            # Updating the start node as the next node
            initial_node = next_node 
        path  = np.array(path)
        list_of_paths.append(path) # Adding the path to the list containing all the paths
    list_of_paths = list(np.array(list_of_paths).reshape(len(list_of_paths),1,16))
    return metrics(list_of_paths,x,y) # Returns the accuracy associated with those videos

In [99]:
metrics = Ant_Colony(population_size,population,column,cost_matrix,pheromone_matrix,parameter,x_test,y_test,100)
metrics = sorted(metrics,key = sorting_criteria,reverse=True)[0]
print(metrics)

({'input_weight': array([[0.6979515 , 0.36220984, 0.58777186, 0.3179276 , 0.0247092 ,
        0.57352694, 0.35620501, 0.71465707, 0.34905706, 0.56866994,
        0.34905706, 0.56866994]]), 'hidden_weight': array([0.34905706, 0.56866994, 0.34905706, 0.56866994])}, 89.2)
