In [1]:
# Importing the necessary libraries
import numpy as np
import pandas as pd
import warnings    
warnings.filterwarnings('ignore')

In [2]:
# Reading the dataset
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


##### Data preprocessing

In [3]:
# Dropping the unnecessary columns
df.drop(columns = ['ZIP Code'],inplace = True)
df.drop_duplicates(inplace = True)

In [4]:
# Shifting the target variable to the last column 
columns = ['ID','Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage','Securities Account', 'CD Account',
       'Online', 'CreditCard', 'Personal Loan']
df = df[columns]

In [5]:
# Splitting the independent & dependent features
x = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [6]:
# Performing the train test split
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.30,random_state = 45)

##### Neural Network

In [7]:
learning_rate = 0.05

In [8]:
def sigmoid(x):
    return (1/(1+np.exp(-1*x)))

In [9]:
def derivative_sigmoid(x):
    return sigmoid(x) *(1-sigmoid(x))

In [10]:
# Initializing the weights randomly
input_weights = np.random.rand(1,12) # The input layer consists of 12 neurons so we take 12 random weights
hidden_weights = np.random.rand(1,4) # The hidden layer consists of 6 neurons so we take 6 random weights

In [11]:
def model(new_input_weight,new_hidden_weight,x_train,y_train,learning_rate):

    # Forward Propagation

    input_layer1 = input_weights*x_train # Multiplying the input data with weights(cross product)
    input_layer2 = input_layer1.reshape(input_layer1.shape[0],4,3).sum(axis = 2) # Converting the dimension for hidden layer
    input_layer3 = sigmoid(input_layer2) # Passing through the activation function
    hidden_layer1 = hidden_weights*input_layer3  # Multiplying the data from activation fn with weights(cross product)
    hidden_layer2 = hidden_layer1.reshape(hidden_layer1.shape[0],1,4).sum(axis = 2) # Converting the dimension for output layer
    output_layer1 = sigmoid(hidden_layer2) # Passing through the activation function

    # Backward Propagation

    error = output_layer1 - y_train # Error in prediction
    delta_hidden = -1 * derivative_sigmoid(hidden_layer1) * input_layer3
    delta_hidden = (delta_hidden.sum(axis = 0)/output_layer1.shape[0]).reshape(1,4)
    new_hidden_weight = hidden_weights + (learning_rate * delta_hidden)
    new_weight = (np.ones((4,3)) * (new_hidden_weight.reshape(4,1))).reshape(1,12)
    delta_input = ((-1 * (new_weight) * (derivative_sigmoid(input_layer1)) * x_train).sum(axis=0) / (output_layer1.shape[0])).reshape(1,12)
    new_input_weight = new_input_weight + (learning_rate * delta_input)
    out = {"input_weight" : new_input_weight,
            "hidden_weight" : new_hidden_weight}
    return out

In [12]:
from sklearn.metrics import accuracy_score
def predict(weights,x_test,y_test):

    input_weight = weights['input_weight']
    hidden_weight = weights['hidden_weight']
    input_layer = input_weight * x_test # Multiplying the input data with weights(cross product)
    input_layer = input_layer.reshape(input_layer.shape[0],4,3).sum(axis=2)
    input_layer = sigmoid(input_layer)  # Passing through the activation function
    hidden_layer = hidden_weight * input_layer # Multiplying the data from activation fn with weights(cross product)
    hidden_layer = hidden_layer.reshape(hidden_layer.shape[0],1,4).sum(axis=2)
    output_layer = np.floor(sigmoid(hidden_layer))  # Passing through the activation function
    
    return accuracy_score(output_layer,y_test)*100

In [13]:
weights = model(input_weights,hidden_weights,x_train,y_train,learning_rate)

In [14]:
output = predict(weights,x_test,y_test)
print("Accuracy score: ",round(output,2))

Accuracy score:  89.2


##### Weight Optimization using Particle Swarm 

In [15]:
# Generating the population
swarm_size = 100
swarm = []
velocity = [] # Contains the velocity of the particle
for particle in range(swarm_size):
    # Initializing the population with random weights & velocity
    input_weight = np.random.rand(1,12)
    hidden_weight = np.random.rand(1,4)
    particle_velocity = np.random.rand(1,16)
    # Combining the weights
    weight = np.append(input_weight,hidden_weight)
    weight = weight.reshape(1,16)
    swarm.append(weight)
    velocity.append(particle_velocity)

In [16]:
def ConvertToMatrix(solution):
    weight = {'input_weight':(solution.reshape(4,4)[:3]).reshape(1,12),
             'hidden_weight': solution.reshape(4,4)[-1]}
    return weight

In [17]:
def fitness_function(population,x,y):
    # Here we consider the accuracy of the model with a particular set of weights to be the fitness function
    fitness_score = []
    for solution in population: # We take each set of weights and compute the accuracy 
        sol = ConvertToMatrix(solution)
        fitness_score.append(predict(sol,x,y)) # Returns the accuracy score that set of weights
    return fitness_score

In [18]:
# The dataframe stores the weights(position), velocity & personal best of the particle(accuracy) 
swarm_df = pd.DataFrame()
swarm_df['weights'] = list(swarm)
swarm_df['velocity'] = velocity
swarm_df['personal_best'] = fitness_function(list(swarm_df['weights'].values),x,y)

In [19]:
def Particle_Swarm(swarm_df,swarm_size,x,y,epoch): 
    
    # Initializing the parameters
    global_best = global_best = np.array(swarm_df['personal_best']).max()
    inertia = np.random.rand()
    c1 = np.random.rand()
    r1 = np.random.rand()
    c2 = np.random.rand()
    r2 = np.random.rand()
    
    for i in range(epoch):
        
        # Getting the initial position(weight),velocity,personal best(accuracy) of each particle in the swarm
        weights = swarm_df['weights'].values
        velocity = swarm_df['velocity'].values
        personal_best = swarm_df['personal_best'].values.reshape(swarm_size,1)
        
        #Updated position & velocity after moving forward
        velocity = (inertia * velocity) + c1*r1*(personal_best*0.01 - weights) + c2*r2*(global_best*0.01 - weights)
        weights = weights + velocity
        
        # Updating the dataframe with new position & velocity
        swarm_df['weights'] = weights
        swarm_df['velocity'] = velocity
        swarm_df['personal_best'] = fitness_function(list(swarm_df['weights'].values),x,y)

    # Sorting based on accuracy(high to low)
    swarm_df.sort_values(by = 'personal_best',ascending = False)
    
    # Finding the optimal position & the accuracy
    best_weight = ConvertToMatrix(swarm_df['weights'][0])
    accuracy = swarm_df['personal_best'][0]
    
    return best_weight,round(accuracy)

In [20]:
Particle_Swarm(swarm_df,swarm_size,x_train,y_train,100)

({'input_weight': array([[0.90828973, 0.90828973, 0.90828973, 0.90828973, 0.90828973,
          0.90828973, 0.90828973, 0.90828973, 0.90828973, 0.90828973,
          0.90828973, 0.90828973]]),
  'hidden_weight': array([0.90828973, 0.90828973, 0.90828973, 0.90828973])},
 91)