In [14]:
import random
import numpy as np
import pandas as pd
import copy
import time
from sklearn.preprocessing import OneHotEncoder

In [15]:
class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        
        self.bias_nitem = sum(sizes[1:])
        self.weight_nitem = sum([self.weights[i].size for i in range(self.num_layers-2)])

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = self.sigmoid(np.dot(w,a)+b)
        return a

    def sigmoid(self, z):
        return 1.0/(1.0+np.exp(-z))

    def score(self, X, y):

        total_score=0
        for i in range(X.shape[0]):
            predicted = self.feedforward(X[i].reshape(-1,1))
            actual = y[i].reshape(-1,1)
            total_score += np.sum(np.power(predicted-actual,2)/2)  # mean-squared error
        return total_score

    def accuracy(self, X, y):


        accuracy = 0
        for i in range(X.shape[0]):
            output = self.feedforward(X[i].reshape(-1,1))
            accuracy += int(np.argmax(output) == np.argmax(y[i]))
        return accuracy / X.shape[0] * 100

    def __str__(self):
        s = "\nBias:\n\n" + str(self.biases)
        s += "\nWeights:\n\n" + str(self.weights)
        s += "\n\n"
        return s

In [16]:
class NNGeneticAlgo:

    def __init__(self, n_pops, net_size, mutation_rate, crossover_rate, retain_rate, X, y):

        self.n_pops = n_pops
        self.net_size = net_size
        self.nets = [Network(self.net_size) for i in range(self.n_pops)]
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.retain_rate = retain_rate
        self.X = X[:]
        self.y = y[:]
    
    def get_random_point(self, type):


        nn = self.nets[0]
        layer_index, point_index = random.randint(0, nn.num_layers-2), 0
        if type == 'weight':
            row = random.randint(0,nn.weights[layer_index].shape[0]-1)
            col = random.randint(0,nn.weights[layer_index].shape[1]-1)
            point_index = (row, col)
        elif type == 'bias':
            point_index = random.randint(0,nn.biases[layer_index].size-1)
        return (layer_index, point_index)

    def get_all_scores(self):
        return [net.score(self.X, self.y) for net in self.nets]

    def get_all_accuracy(self):
        return [net.accuracy(self.X, self.y) for net in self.nets]

    def crossover(self, father, mother):
        nn = copy.deepcopy(father)

        for _ in range(self.nets[0].bias_nitem):
            layer, point = self.get_random_point('bias')
            if random.uniform(0,1) < self.crossover_rate:
                nn.biases[layer][point] = mother.biases[layer][point]

        for _ in range(self.nets[0].weight_nitem):
            layer, point = self.get_random_point('weight')
            if random.uniform(0,1) < self.crossover_rate:
                nn.weights[layer][point] = mother.weights[layer][point]
        
        return nn
        
    def mutation(self, child):

        nn = copy.deepcopy(child)

        for _ in range(self.nets[0].bias_nitem):
            layer, point = self.get_random_point('bias')
            if random.uniform(0,1) < self.mutation_rate:
                nn.biases[layer][point] += random.uniform(-0.5, 0.5)

        for _ in range(self.nets[0].weight_nitem):
            layer, point = self.get_random_point('weight')
            if random.uniform(0,1) < self.mutation_rate:
                nn.weights[layer][point[0], point[1]] += random.uniform(-0.5, 0.5)

        return nn

    def evolve(self):

        score_list = list(zip(self.nets, self.get_all_scores()))

        score_list.sort(key=lambda x: x[1])

        score_list = [obj[0] for obj in score_list]

        retain_num = int(self.n_pops*self.retain_rate)
        score_list_top = score_list[:retain_num]

        retain_non_best = int((self.n_pops-retain_num) * self.retain_rate)
        for _ in range(random.randint(0, retain_non_best)):
            score_list_top.append(random.choice(score_list[retain_num:]))

        while len(score_list_top) < self.n_pops:

            father = random.choice(score_list_top)
            mother = random.choice(score_list_top)

            if father != mother:
                new_child = self.crossover(father, mother)
                new_child = self.mutation(new_child)
                score_list_top.append(new_child)
        
        self.nets = score_list_top


In [17]:
def main():

    df2 = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
    df_norm = (df2-df2.min())/(df2.max()-df2.min())
    df_norm = df_norm.drop(['ID','ZIP Code','Online','CCAvg','Mortgage','Experience'],axis = 1)
    X = df_norm.iloc[:, :-1].values
    y = df_norm.iloc[:, -1].values

    y = y.reshape(-1, 1)
    enc = OneHotEncoder()
    enc.fit(y)
    y = enc.transform(y).toarray()

    N_POPS = 32
    NET_SIZE = [7,6,5,2]
    MUTATION_RATE = 0.3
    CROSSOVER_RATE = 0.4
    RETAIN_RATE = 0.4

    nnga = NNGeneticAlgo(N_POPS, NET_SIZE, MUTATION_RATE, CROSSOVER_RATE, RETAIN_RATE, X, y)

    start_time = time.time()
    
    for i in range(20):

        if i % 1 == 0:
            print("Current iteration : {}".format(i+1))
            print("Time taken by far : %.1f seconds" % (time.time() - start_time))
            print("Current top member's network accuracy: %.2f%%\n" % nnga.get_all_accuracy()[0])

        nnga.evolve()

if __name__ == "__main__":
    main()

Current iteration : 1
Time taken by far : 0.0 seconds
Current top member's network accuracy: 70.60%

Current iteration : 2
Time taken by far : 5.1 seconds
Current top member's network accuracy: 70.60%

Current iteration : 3
Time taken by far : 10.6 seconds
Current top member's network accuracy: 70.60%

Current iteration : 4
Time taken by far : 15.6 seconds
Current top member's network accuracy: 70.60%

Current iteration : 5
Time taken by far : 21.0 seconds
Current top member's network accuracy: 70.60%

Current iteration : 6
Time taken by far : 26.3 seconds
Current top member's network accuracy: 70.60%

Current iteration : 7
Time taken by far : 31.4 seconds
Current top member's network accuracy: 70.60%

Current iteration : 8
Time taken by far : 36.4 seconds
Current top member's network accuracy: 70.60%

Current iteration : 9
Time taken by far : 41.6 seconds
Current top member's network accuracy: 70.60%

Current iteration : 10
Time taken by far : 46.6 seconds
Current top member's network

In [18]:
df2 = pd.read_csv('Bank_Personal_Loan_Modelling.csv')
df_norm = (df2-df2.min())/(df2.max()-df2.min())
df_norm = df_norm.drop(['ID','ZIP Code','Online','CCAvg'],axis = 1)

In [19]:
df_norm.head()

Unnamed: 0,Age,Experience,Income,Family,Education,Mortgage,Personal Loan,Securities Account,CD Account,CreditCard
0,0.045455,0.086957,0.189815,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.5,0.478261,0.12037,0.666667,0.0,0.0,0.0,1.0,0.0,0.0
2,0.363636,0.391304,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.272727,0.26087,0.425926,0.0,0.5,0.0,0.0,0.0,0.0,0.0
4,0.272727,0.23913,0.171296,1.0,0.5,0.0,0.0,0.0,0.0,1.0
