# Genetic algorithm-based heuristic for feature selection in credit risk assessment

First we import libraries ,we will be needed through this notebook : 

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification
from numpy.random import rand as random
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import adam
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Input

Using TensorFlow backend.


## Configes

In [16]:
genetic_config = { 
    'maxFeatureNum': 12,
    'minFeatureNum': 5,
    'popSize': 50,
    'trueProb': .5,
    'cProb': .9,
    'mProb': .01,
    'generationNum': 50,
    'tPressure': .1,
    'genNum': 12
    
}

nn_config = {
    'lr': np.random.uniform(0.3, 1.0),
    'trainCycles': np.random.uniform(300, 600),
    'm': np.random.uniform(0.2, 0.7)
}


## Implementation of Neuralnetwork architecture

In [41]:
class Chromosome : 
    chromosomes = {} # Every Computed Chromosome will be save in this chromosomes
    def __init__(self,genNum,trueProb) :
        self.chrom = np.array([True if random() > trueProb else False for _ in range(genNum)])
        self.fitness = None

    def __iter__(self) :
        return np.nditer(self.chrom)
        
    def mutate(self,prob) :
        self.chrom = np.vectorize(lambda x:x if random() > prob else not x)(self.chrom)
        
    @staticmethod
    def keras_model(input_dim,hiddenNum=40,lr=.1,m=.5) :
        model = Sequential()
        model.add(Dense(hiddenNum, input_dim=input_dim, kernel_initializer='normal', activation='sigmoid'))
        # model.add(Dense(hiddenNum, kernel_initializer='normal', activation='selu'))
        model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
        sgd = SGD(lr=lr, momentum=m)
        ADAM = adam(lr=lr)
        # loss could be "mse" too
        model.compile(loss='binary_crossentropy',metrics=['accuracy','binary_accuracy'],optimizer=sgd)
        return lambda: model

    def calculateFitness(self,X,Y,hiddenNum=40,trainCycles=450,lr=.1,m=.5) :
        """Train one layer feedforward neural network
        Args :
           X : training data
           Y : training label
           hiddenNum : number of hidden units of hidden layer
           trainCycles : number of training cycles
           lr : learning rate of nueral network
           m: momentum of neural network
        Returns :
           'float' accuracy
        """
        string_arr = ''.join(map(str, 1*self.chrom))
        if string_arr not in chromosomes :
            model = keras_model(X.shape[1],hiddenNum,lr,m)
            classifier = KerasClassifier(build_fn=Chromosome.keras_model(X.shape[1],hiddenNum,lr,m), 
                                 epochs=trainCycles,batch_size=len(X),verbose=0)
            chromosomes[string_arr] = max(cross_val_score(classifier, X, Y, cv=10,verbose=0))
        return chromosomes[string_arr]

In [42]:
class Population :
    def __init__(self,popSize,genNum,trueProb=.5,tPressure=.1) :
        self.pop = np.array([])
        self.popSize = popSize
        self.genNum = genNum
        self.trueProb = trueProb
        self.tSize = int(popSize*tPressure)
        self.X = X
        self.Y = Y
        for _ in range(popSize) : # initializing chromosome
            np.append(self.pop,Chromosome(genNum,trueProb))
        
    def __iter__(self) :
        return np.nditer(self.pop)
    
    def mutate(self,prob) :
        for chrom in self :
            chrom.mutate(prob)
    
    def crossover(self,prob) :
        newPop = Population(int(len(self.pop)/4)*2,self.genNum,self.trueProb,self.X,self.Y)
        for i in range(int(len(self.pop)/4)) :
            rand_prob = np.random.random()
            first = self.tournament(self.tSize).chrom 
            second = self.tournament(self.tSize).chrom
            if rand_prob > prob :
                point = random.randint(len(first))
                newPop.pop[i*2].chrom = np.append(first[:point] + second[point:])
                newPop.pop[i*2+1].chrom = np.append(first[:point] + second[point:])
            else :
                newPop.pop[i*2].chrom = first[:]
                newPop.pop[i*2+1].chrom = second[:]
        return newPop
    
    def tournament(self,k) :
        return max(np.random.choice(self.pop,k,False),key=lambda c:c.fitness)
    
    def selection(self,newIndiv) :
        self.pop = sorted(self.pop,key=lambda c: c.fitness)
        for i in range(len(newIndiv)) :
            self.pop[i] = newIndiv[i]
    
    def calculateFitness(self,X,Y,hiddenNum=40,trainCycles=450,lr=.1,m=.5) :
        for chrom in self.pop :
            fitness = chrom.calculateFitness(X,Y,hiddenNum,trainCycles,lr,m)
            if fitness < self.bestFitness :         
                self.bestFitness = fitness
        return bestFitness

In [14]:
def geneticAlgorithm(X,Y,generationNum,popSize,genNum,trueProb,tPressure,cProb,mProb,trainCycles,lr,m,maxFeatureNum,minFeatureNum) :
    population = Population(popSize,genNum,trueProb,tPressure,X,Y)
    bestFitness = population.calculateFitness(X,Y,featureNum + classNumber,trainCycles,lr,m)
    for _ in range(generationNum) :
        newPop = population.crossover(cProb)
        newPop.mutate(mProb)
        fitness = newPop.calculateFitness(X,Y,featureNum + classNumber,trainCycles,lr,m)
        bestFitness = fitness if bestFitness < fitness else bestFitness
        population.selection(newPop.pop)
    return bestFitness

In [None]:
x_train = pd.read_excel('GermanCreditInput.xls',header=None)
y_train = pd.read_excel('GermanCreditOutputClass1columnknn.xls',header=None)
geneticAlgorithm(np.array(x_train),np.array(y_train),**genetic_config,**nn_config)