In [1]:
# Importing the necessary libraries
import numpy as np
import pandas as pd
import warnings    
warnings.filterwarnings('ignore')

In [2]:
# Reading the dataset
df = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB


##### Data preprocessing

In [3]:
# Dropping the unnecessary columns
df.drop(columns = ['ZIP Code'],inplace = True)
df.drop_duplicates(inplace = True)

In [4]:
# Shifting the target variable to the last column 
columns = ['ID','Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
       'Mortgage','Securities Account', 'CD Account',
       'Online', 'CreditCard', 'Personal Loan']
df = df[columns]

In [5]:
# Splitting the independent & dependent features
x = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [6]:
# Performing the train test split
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.30,random_state = 45)

##### Neural Network

In [7]:
learning_rate = 0.05

In [8]:
def sigmoid(x):
    return (1/(1+np.exp(-1*x)))

In [9]:
def derivative_sigmoid(x):
    return sigmoid(x) *(1-sigmoid(x))

In [10]:
# Initializing the weights randomly
input_weights = np.random.rand(1,12) # The input layer consists of 12 neurons so we take 12 random weights
hidden_weights = np.random.rand(1,4) # The hidden layer consists of 6 neurons so we take 6 random weights

In [11]:
def model(new_input_weight,new_hidden_weight,x_train,y_train,learning_rate):

    # Forward Propagation

    input_layer1 = input_weights*x_train # Multiplying the input data with weights(cross product)
    input_layer2 = input_layer1.reshape(input_layer1.shape[0],4,3).sum(axis = 2) # Converting the dimension for hidden layer
    input_layer3 = sigmoid(input_layer2) # Passing through the activation function
    hidden_layer1 = hidden_weights*input_layer3  # Multiplying the data from activation fn with weights(cross product)
    hidden_layer2 = hidden_layer1.reshape(hidden_layer1.shape[0],1,4).sum(axis = 2) # Converting the dimension for output layer
    output_layer1 = sigmoid(hidden_layer2) # Passing through the activation function

    # Backward Propagation

    error = output_layer1 - y_train # Error in prediction
    delta_hidden = -1 * derivative_sigmoid(hidden_layer1) * input_layer3
    delta_hidden = (delta_hidden.sum(axis = 0)/output_layer1.shape[0]).reshape(1,4)
    new_hidden_weight = hidden_weights + (learning_rate * delta_hidden)
    new_weight = (np.ones((4,3)) * (new_hidden_weight.reshape(4,1))).reshape(1,12)
    delta_input = ((-1 * (new_weight) * (derivative_sigmoid(input_layer1)) * x_train).sum(axis=0) / (output_layer1.shape[0])).reshape(1,12)
    new_input_weight = new_input_weight + (learning_rate * delta_input)
    out = {"input_weight" : new_input_weight,
            "hidden_weight" : new_hidden_weight}
    return out

In [12]:
from sklearn.metrics import accuracy_score
def predict(weights,x_test,y_test):

    input_weight = weights['input_weight']
    hidden_weight = weights['hidden_weight']
    input_layer = input_weight * x_test # Multiplying the input data with weights(cross product)
    input_layer = input_layer.reshape(input_layer.shape[0],4,3).sum(axis=2)
    input_layer = sigmoid(input_layer)  # Passing through the activation function
    hidden_layer = hidden_weight * input_layer # Multiplying the data from activation fn with weights(cross product)
    hidden_layer = hidden_layer.reshape(hidden_layer.shape[0],1,4).sum(axis=2)
    output_layer = np.floor(sigmoid(hidden_layer))  # Passing through the activation function
    
    return accuracy_score(output_layer,y_test)*100

In [13]:
weights = model(input_weights,hidden_weights,x_train,y_train,learning_rate)

In [14]:
output = predict(weights,x_test,y_test)
print("Accuracy score: ",round(output,2))

Accuracy score:  89.2


##### Weight Optimization using Cultural Algorithm

In [15]:
# Generating the population
population_size = 50
population = []
for solution in range(population_size):
    # Initializing the population with random weights
    input_weight = np.random.rand(1,12)
    hidden_weight = np.random.rand(1,4)
    # Combining the weights
    weight = np.append(input_weight,hidden_weight)
    weight = weight.reshape(1,16)
    population.append(weight)

In [16]:
# The dataframe stores the weights & the corresponding accuracy
pop_df = pd.DataFrame()
pop_df['weights'] = list(population)

In [17]:
def ConvertToMatrix(solution):
    weight = {'input_weight':(solution.reshape(4,4)[:3]).reshape(1,12),
             'hidden_weight': solution.reshape(4,4)[-1]}
    return weight

In [18]:
def parent_selection(population_size,no_of_parents = 2):
    # Tournament selection
    # We select 2 parents at random from the population
    parents = [np.random.randint(0,population_size) for i in range(no_of_parents)]
    return parents # Returns the indices of the population as a list

In [19]:
def fitness_function(population,x,y):
    # Here we consider the accuracy of the model with a particular set of weights to be the fitness function
    fitness_score = []
    for solution in population: # We take each set of weights and compute the accuracy 
        sol = ConvertToMatrix(solution)
        fitness_score.append(predict(sol,x,y)) # Returns the accuracy score that set of weights
    return fitness_score

In [20]:
# We create a belief system such that we consider only those weights(solutions) that give atleast 85% accuracy
def influence(pop_df):
    culture_df = pd.DataFrame(columns = ['weights','fitness_value'])
    culture_df = pop_df[pop_df['fitness_value'] >= 85] 
    return culture_df,len(culture_df)

In [21]:
# We take 2 additional parents that follow the belief system(atleast 85% accuracy) to maintain the culture
def acceptance(population_size,no_of_parents = 2):
    # Tournament selection
    # We select 2 parents at random from the culture
    parents = [np.random.randint(0,population_size) for i in range(no_of_parents)]
    return parents # Returns the indices of the population as a list

In [22]:
def crossover(parent1,parent2,parent3,parent4,culture_df,weight_df):
    #Performing single point crossover
    # Takes 4 parents - 2 randomly picked & 2 that follow the belief system
    
    #Generating the split point
    split_point = np.random.randint(0,16) # Since there are 16 weights in a solution
    
    # Performing single point crossover
    child1 = np.append(weight_df['weights'][parent1][:split_point],weight_df['weights'][parent2][split_point:])
    child1 = child1.reshape(1,16)
    
    child2 = np.append(weight_df['weights'][parent1][split_point:],weight_df['weights'][parent2][:split_point])
    child2 = child2.reshape(1,16)
    
    child3 = np.append(culture_df['weights'][parent3][:split_point],culture_df['weights'][parent4][split_point:])
    child3 = child3.reshape(1,16)
    
    child4 = np.append(culture_df['weights'][parent3][split_point:],culture_df['weights'][parent4][:split_point])
    child4 = child4.reshape(1,16)
    
    return child1,child2,child3,child4

In [23]:
def mutation(child1,child2,child3,child4):
    # Performing swap mutation
    # Generating the indices to swap the weights
    indices = [np.random.randint(0,16) for i in range(2)]
    
    # Swapping the weights at those indices for all the children
    child1[0][indices[0]],child1[0][indices[1]] = child1[0][indices[1]],child1[0][indices[0]]
    
    child2[0][indices[0]],child2[0][indices[1]] = child2[0][indices[1]],child2[0][indices[0]]
    
    child3[0][indices[0]],child3[0][indices[1]] = child3[0][indices[1]],child3[0][indices[0]]
    
    child4[0][indices[0]],child4[0][indices[1]] = child4[0][indices[1]],child4[0][indices[0]]
    
    return child1,child2,child3,child4

In [24]:
def replace_population(weight_df,children,sol1,sol2,sol3,sol4,x,y):
    
    # Replacing the solutions having least accuracy with the newly produced offspring
    
    #Generating the accuracy of the new solutions(offsprings)
    fitness_score = fitness_function(children,x,y)
    
    # Replacing the old solutions in the dataframe with new offsprings
    weight_df.loc[sol1,['weights','fitness_value']] = [children[0],fitness_score[0]]
    weight_df.loc[sol2,['weights','fitness_value']] = [children[1],fitness_score[1]]
    weight_df.loc[sol3,['weights','fitness_value']] = [children[2],fitness_score[2]]
    weight_df.loc[sol4,['weights','fitness_value']] = [children[3],fitness_score[3]]

In [25]:
def Cultural_Algorithm(weight_df,x,y,epoch):
    for i in range(epoch):
        #Fitness score calculation
        weight_df['fitness_value'] = fitness_function(list(weight_df['weights'].values),x,y)
        
        # Parent selection by random process
        parents = parent_selection(population_size,2)
        parent1,parent2 = parents[0],parents[1]
        
        # Parent selection from the culture
        culture_population_df,culture_size = influence(weight_df)
        culture_parents = acceptance(culture_size,2)
        parent3,parent4 = culture_parents[0],culture_parents[1]
        
        # Crossover operation
        child1,child2,child3,child4 = crossover(parent1,parent2,parent3,parent4,culture_population_df,weight_df)
        
        # Mutation operation
        child1,child2,child3,child4 = mutation(child1,child2,child3,child4)
        children = np.stack((child1,child2,child3,child4))
        
        # Sorting the dataframe containing weights & fitness value based on fitness value(accuracy)
        weight_df.sort_values(by = 'fitness_value',ascending = False)
        
        # Generating the indices of the solutions with least accuracy
        least_acc1 = len(weight_df)-1
        least_acc2 = len(weight_df)-2
        least_acc3 = len(weight_df)-3
        least_acc4 = len(weight_df)-4
        
        # Replacing the solutions with least accuracy in the population
        replace_population(weight_df,children,least_acc1,least_acc2,least_acc3,least_acc4,x,y)
        
        # Sorting the dataframe after the addition of new solutions
        weight_df.sort_values(by = 'fitness_value',ascending = False)
        
        # The weights with highest accuracy
        best_weight = ConvertToMatrix(weight_df['weights'][0]) 
        accuracy = weight_df['fitness_value'][0]
        
        return best_weight,round(accuracy)

In [26]:
Cultural_Algorithm(pop_df,x_test,y_test,100)

({'input_weight': array([[0.002746  , 0.22805055, 0.24721236, 0.68547584, 0.86076404,
          0.58348449, 0.52097235, 0.07803278, 0.36716149, 0.71663317,
          0.66139079, 0.53869698]]),
  'hidden_weight': array([0.63023787, 0.96537321, 0.16046596, 0.70231835])},
 89)