In [1]:
# Seed value
seed_value= 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 3. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)

In [2]:
import pandas as pd
import operator
import json
import copy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
from deap import base
from deap import creator
from deap import gp
from deap import tools

In [3]:
file_train='train.csv'
file_test='test.csv'

In [4]:
# read training data
train_df = pd.read_csv(file_train,index_col='PassengerId')

In [5]:
train_df.isnull().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [6]:
def prep_data(df):
    # Drop unwanted features
    df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
    
    # Fill missing data: Age and Fare with the mean, Embarked with most frequent value
    df[['Age']] = df[['Age']].fillna(value=df[['Age']].mean())
    df[['Fare']] = df[['Fare']].fillna(value=df[['Fare']].mean())
    df[['Embarked']] = df[['Embarked']].fillna(value=df['Embarked'].value_counts().idxmax())
    
    # Convert categorical  features into numeric
    df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
      
    # Convert Embarked to one-hot
    enbarked_one_hot = pd.get_dummies(df['Embarked'], prefix='Embarked')
    df = df.drop('Embarked', axis=1)
    df = df.join(enbarked_one_hot)

    return df



In [7]:
train_df = prep_data(train_df)
train_df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age           0
SibSp         0
Parch         0
Fare          0
Embarked_C    0
Embarked_Q    0
Embarked_S    0
dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(train_df, train_df['Survived'], test_size=0.3, random_state=123, stratify = train_df['Survived'])
# X contains all columns except 'Survived'  
X_train = X_train.drop(['Survived'], axis=1).values.astype(float)
X_test = X_test.drop(['Survived'], axis=1).values.astype(float)

# It is almost always a good idea to perform some scaling of input values when using neural network models (jb).

scale = MinMaxScaler()
X_train = scale.fit_transform(X_train)
X_test = scale.fit_transform(X_test)

In [9]:
def evaluate_model(individual):
    try:
        adam = optimizers.Adam(lr = 0.001, amsgrad = False)
        inputs = Input(shape=(X_train.shape[1],), name='inputs')
        last_layer = inputs

        for layer in range(individual[0]):
            dense_layer = Dense(individual[layer + 2], activation = "relu")(last_layer)
            last_layer = dense_layer

        outputs = Dense(1, activation='sigmoid')(last_layer)
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics =['accuracy'])
        model_fit = model.fit(np.asarray(X_train), y=np.asarray(y_train), epochs=individual[1], validation_split=0.2, verbose=0)
        score, acc = model.evaluate(X_train, y=np.asarray(y_train), )
        print('Train score:', score)
        print('Train accuracy:', acc)
        score, acc = model.evaluate([X_test], y=np.asarray(y_test), )
        print('Test score:', score)
        print('Test accuracy:', acc)
        return float(acc),
    except:
        return 0,

In [10]:
#model_acc = evaluate_model([7, 15, 128, 64, 32, 16, 8, 4, 2])

In [11]:
def create_individual(container):
    hid_layers = random.randint(0, 100)
    epochs = random.randint(0, 40)
    result = [hid_layers, epochs]
    
    for layer in range(hid_layers):
        neurons = random.randint(0, 1000)
        result.append(neurons)
        
    return container(result)

In [12]:
creator.create("FitnessMax", base.Fitness, weights=(1.0, 1.0))
creator.create("Individual", list, fitness=creator.FitnessMax)
# Individual will be a list of hyperparameters for a FF NN
# individual[0] : number of hidden layers
# individual[1] : epochs
# individual[x + 2] : number of neurons in the layer

In [13]:
toolbox = base.Toolbox()
# Attribute generator 
toolbox.register("attr_int", random.randint, 0, 100)
# Structure initializers
toolbox.register("individual", create_individual, creator.Individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)


In [14]:
toolbox.register("evaluate", evaluate_model)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [15]:
def main():
    pop = toolbox.population(n=10)
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        #print(float(ind.fitness.values[0]))
        
    best_individual_fitness = 0
    # Begin the evolution
    #for g in range(1): 
    g = 0
    while best_individual_fitness < 0.9:
        print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))

        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values        
    
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        
        # Replace population
        pop[:] = offspring
    
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        print("  Min %s" % min(fits))
        print("  Max %s" % max(fits))
        print("  Avg %s" % mean)
        print("  Std %s" % std)
        best_ind = tools.selBest(pop, 1)[0]
        print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))
        best_individual_fitness = float(best_ind.fitness.values[0])
        g += 1
        
    print("-- End of (successful) evolution --")
    
    best_ind = tools.selBest(pop, 1)[0]
    print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))

In [16]:
main()



Train score: 0.6658145334709131
Train accuracy: 0.6163724
Test score: 0.6661436673420579
Test accuracy: 0.61567163
-- Generation 0 --
  Min 0.6156716346740723
  Max 0.6156716346740723
  Avg 0.6156716346740723
  Std 0.0
Best individual is [49, 26, 41, 265, 988, 523, 497, 414, 940, 802, 849, 310, 991, 488, 366, 597, 913, 929, 223, 516, 142, 288, 143, 773, 97, 633, 818, 256, 931, 545, 722, 829, 616, 923, 150, 317, 101, 747, 75, 920, 870, 700, 338, 483, 573, 103, 362, 444, 323, 625, 655], (0.6156716346740723,)
0.6156716346740723
-- End of (successful) evolution --
Best individual is [49, 26, 41, 265, 988, 523, 497, 414, 940, 802, 849, 310, 991, 488, 366, 597, 913, 929, 223, 516, 142, 288, 143, 773, 97, 633, 818, 256, 931, 545, 722, 829, 616, 923, 150, 317, 101, 747, 75, 920, 870, 700, 338, 483, 573, 103, 362, 444, 323, 625, 655], (0.6156716346740723,)
