In [50]:
import random
import operator

import numpy as np
import matplotlib.pyplot as plt

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [51]:
pset = gp.PrimitiveSetTyped("main", [float, float, float, float, 
                                     bool, bool, bool, bool, bool, bool, bool, bool], bool)
pset.addPrimitive(operator.xor, [bool, bool], bool)
pset.addPrimitive(operator.and_, [bool, bool], bool)
pset.addPrimitive(operator.or_, [bool, bool], bool)
pset.addPrimitive(operator.mul, [float, float], float)
pset.addPrimitive(operator.add, [float, float], float)
pset.addPrimitive(operator.pow, [float, float], float)
pset.addPrimitive(np.subtract, [float, float], float)

In [52]:
expr = gp.genFull(pset, min_=12, max_=12)
tree = gp.PrimitiveTree(expr)

In [53]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin,
               pset=pset)

In [54]:
# Age  SibSp  Parch      Fare  female  male  C  Q  S  1  2  3
def evalSymbReg(tree, pset):
    function = gp.compile(tree, pset)
    i = 0
    total_correct = 0
    false_p = 0
    false_n = 0
    for row in df1.iterrows():
        Age = df1.iloc[i,0]
        SibSp = df1.iloc[i, 1]
        Parch = df1.iloc[i, 2]
        Fare = df1.iloc[i, 3]
        female = df1.iloc[i, 4]
        male = df1.iloc[i, 5]
        c = df1.iloc[i, 6]
        q = df1.iloc[i, 7]
        s = df1.iloc[i, 8]
        p1 = df1.iloc[i, 9]
        p2 = df1.iloc[i, 10]
        p3 = df1.iloc[i, 11]
        x = function(Age, SibSp, Parch, Fare, female, male, c,q,s,p1,p2,p3)
        truth = y1.iloc[i]
        if truth == 0:
            if x == 1:
                false_p = false_p + 1
        else:
            if x == 0:
                false_n = false_n + 1
        i = i + 1
    #return total_correct / float(i)

In [55]:
toolbox = base.Toolbox()
toolbox.register("expr", gp.genFull, pset=pset, min_=1, max_=12)
toolbox.register("individual", tools.initIterate, creator.Individual,
                 toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evalSymbReg, pset = pset)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

In [56]:
df = pd.read_csv('trainForTitanic.csv', delimiter=',')

In [57]:
sex = pd.Series(df['Sex'])
sexDF = pd.get_dummies(sex)
df = df.drop('Sex', axis = 1)
df = df.join(sexDF)

In [58]:
embarked = pd.Series(df['Embarked'])
embarkedDF = pd.get_dummies(embarked)
df = df.drop('Embarked', axis = 1)
df = df.join(embarkedDF)

In [59]:
#df['Pclass'].fillna((df['Pclass'].mean()), inplace=True)
pclass = pd.Series(df['Pclass'])
pclassDF = pd.get_dummies(pclass)
df = df.drop('Pclass', axis = 1)
df = df.join(pclassDF)

In [60]:
df = df.drop({'Name', 'Ticket', 'Cabin'}, axis = 1)

df['Age'].fillna(int(df['Age'].mean()), inplace=True)
df['SibSp'].fillna((df['SibSp'].mean()), inplace=True)
df['Fare'].fillna( int(df['Fare'].mean()), inplace=True)
df['Parch'].fillna((df['Parch'].mean()), inplace=True)

df1 = df.drop("Survived", axis = 1)
y1 = df['Survived']

df1 = df1.drop('PassengerId', axis = 1)
#We don't need to cross-validate but we are simply testing the individual with the entire training data and basing fitness
#off of that
#df1, df2, y1, y2 = train_test_split(df1, y1, test_size=.3, random_state = 0)

In [61]:
pop = toolbox.population(n=300)
fitnesses = list(map(toolbox.evaluate, pop))

In [62]:
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = (fit, )