In [1]:
import numpy as np
import pandas as pd

import math
import random
import operator
import itertools

from deap import algorithms,base,creator,tools,gp
from features1 import fill_age_1,add_title

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train_x = train.drop("Survived",axis=1)
train_y = train['Survived']



In [2]:
def MungeData(data):
    # Sex
#    data=add_title(data)   
#    data=fill_age_1(data)
#    data.drop('Title', axis=1, inplace=True)
    data.drop(['Ticket', 'Name', 'PassengerId'], inplace=True, axis=1)
    data.Sex.fillna('0', inplace=True)
    data.loc[data.Sex != 'male', 'Sex'] = 0
    data.loc[data.Sex == 'male', 'Sex'] = 1
    # Cabin
    data.Cabin.fillna('0', inplace=True)
    data.loc[data.Cabin.str[0] == 'A', 'Cabin'] = 1
    data.loc[data.Cabin.str[0] == 'B', 'Cabin'] = 2
    data.loc[data.Cabin.str[0] == 'C', 'Cabin'] = 3
    data.loc[data.Cabin.str[0] == 'D', 'Cabin'] = 4
    data.loc[data.Cabin.str[0] == 'E', 'Cabin'] = 5
    data.loc[data.Cabin.str[0] == 'F', 'Cabin'] = 6
    data.loc[data.Cabin.str[0] == 'G', 'Cabin'] = 7
    data.loc[data.Cabin.str[0] == 'T', 'Cabin'] = 8
    # Embarked
    data.loc[data.Embarked == 'C', 'Embarked'] = 1
    data.loc[data.Embarked == 'Q', 'Embarked'] = 2
    data.loc[data.Embarked == 'S', 'Embarked'] = 3
    data.Embarked.fillna(0, inplace=True)
    data.fillna(-1, inplace=True)

    return data.astype(float)
train_x = MungeData(train_x).values.tolist()
test = MungeData(test).values.tolist()
train_y = train_y.tolist()

In [3]:
def proba(data):
    return (1.-(1./(1.+np.exp(-data))))

def predict(prob):
    return np.round(prob)

def protectedDiv(a,b):
    try:
        return a/b
    except ZeroDivisionError:
        return 1
def protectedLog(a):
    x=np.log(a)
    if np.isnan(x):
        return 728
    return x

pset = gp.PrimitiveSetTyped('MAIN',itertools.repeat(float, 8), float)
pset.addPrimitive(protectedLog, [float], float)
pset.addPrimitive(operator.add, [float,float],float)
pset.addPrimitive(operator.sub, [float,float],float)
pset.addPrimitive(operator.mul, [float,float],float)
pset.addPrimitive(protectedDiv, [float,float],float)
pset.addPrimitive(operator.neg, [float],float)
pset.addPrimitive(np.cos,[float],float)
pset.addPrimitive(np.sin,[float],float)
pset.addPrimitive(np.tanh,[float],float)
pset.addPrimitive(np.minimum, [float,float],float)
pset.addPrimitive(np.maximum, [float,float],float)
# Terminals
pset.addEphemeralConstant("rand1", lambda: random.random()*2-1, float)
#pset.addEphemeralConstant("rand1000", lambda: round(random.random()*1000),float)
pset.addTerminal(np.pi,float)
pset.addTerminal(2.0,float)
pset.renameArguments(ARG0='PClass')
pset.renameArguments(ARG1='Sex')
pset.renameArguments(ARG2='Age')
pset.renameArguments(ARG3='SibSp')
pset.renameArguments(ARG4='Parch')
pset.renameArguments(ARG5='Fare')
pset.renameArguments(ARG6='Cabin')
pset.renameArguments(ARG7='Embarked')

In [9]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create('Individual', gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register('expr', gp.genHalfAndHalf, pset = pset, min_=1, max_=10)
toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('compile', gp.compile, pset=pset)

In [10]:
def log_loss(y_true, y_prob):
    n = len(y_true)
    result=0.0
    for i in range(n):
        y_prob[i]=np.minimum(np.maximum(1e-15, y_prob[i]),1-1e-15)
        if y_true[i]:
            result+=np.log(y_prob[i])
        else:
            result+=np.log(1-y_prob[i])
    return 999.0 if np.isnan(result) else -1.0*result/n

def evalFitness(individual):
    func = toolbox.compile(expr=individual)
    return log_loss(train_y,[proba(func(*x)) for x in train_x]),
    
toolbox.register('evaluate', evalFitness)

In [11]:
def staticLimitCrossover(ind1, ind2, heightLimit):
    keepInd1, keepInd2 = toolbox.clone(ind1), toolbox.clone(ind2)
    gp.cxOnePoint(ind1, ind2)
    if ind1.height > heightLimit:
        ind1[:] = keepInd1
    if ind2.height > heightLimit:
        ind2[:] = keepInd2
        
def staticLimitCrossover1(ind1, ind2, heightLimit):
    keepInd1, keepInd2 = toolbox.clone(ind1), toolbox.clone(ind2)
    gp.cxOnePointLeafBiased(ind1, ind2, 0.2)
    if ind1.height > heightLimit:
        ind1[:] = keepInd1
    if ind2.height > heightLimit:
        ind2[:] = keepInd2

def staticLimitMutation(individual, expr, pset, heightLimit):
    keepInd = toolbox.clone(individual)
    gp.mutUniform(individual, expr,pset)
    if individual.height > heightLimit:
        individual[:] = keepInd 

def staticLimitMutation1(individual, heightLimit):
    keepInd = toolbox.clone(individual)
    gp.mutEphemeral(individual, mode='one')
    if individual.height > heightLimit:
        individual[:] = keepInd 

def staticLimitMutation2(individual, pset, heightLimit):
    keepInd = toolbox.clone(individual)
    gp.mutNodeReplacement(individual, pset)
    if individual.height > heightLimit:
        individual[:] = keepInd 

def staticLimitMutation3(individual, pset, heightLimit):
    keepInd = toolbox.clone(individual)
    gp.mutInsert(individual, pset)
    if individual.height > heightLimit:
        individual[:] = keepInd 

def staticLimitMutation4(individual, heightLimit):
    keepInd = toolbox.clone(individual)
    gp.mutShrink(individual)
    if individual.height > heightLimit:
        individual[:] = keepInd 
toolbox.register('selectW', tools.selWorst)
toolbox.register('selectR', tools.selRoulette)
toolbox.register('selectT', tools.selTournament, tournsize=3)
toolbox.register('selectB', tools.selBest)
toolbox.register('selectSPEA', tools.selSPEA2)
toolbox.register('select', tools.selDoubleTournament)


toolbox.register("mate", staticLimitCrossover, heightLimit=17)
toolbox.register("mateL", staticLimitCrossover1, heightLimit=17)
toolbox.register("mutateS",  staticLimitMutation4, heightLimit=17) 
toolbox.register("mutateI",  staticLimitMutation3, pset=pset, heightLimit=17) 
toolbox.register("mutateR",  staticLimitMutation2, pset=pset, heightLimit=17) 
toolbox.register("mutateE",  staticLimitMutation1, heightLimit=17) 
toolbox.register("mutateU", staticLimitMutation, expr=toolbox.expr, pset=pset, heightLimit=17) 


def same_hof(h1,h2):
    for i in range(len(h1)):
        if not h1[i][0]==h2[i][0]:
            return False
    return True    


In [25]:
# Evolutionary Algorithm with Elitism

In [24]:
# Evolutionary Algorithm where fitter individual have lower probability of crossover and mutation
# Initialize parameters
NGEN = 40000
size=100
MUT_R = 0.2
MUT_I = 0.2
MUT_U = 0.4
MUT_E = 0.4
MUT_S = 0.2

PROB_X_MAX = 0.8
PROB_X_MIN = 0.0

epoch = 10000
count=10

# Initialize containers
pop = toolbox.population(n=size)
hof = tools.HallOfFame(1)


fitnesses  = map(toolbox.evaluate, pop)
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit
current_best = toolbox.selectB(pop,1)
hof.insert(current_best[0])


for g in range(NGEN):
    # clone population
    offspring = map(toolbox.clone, pop)
    
    # replace individuals with invalid fitness
    shitz = [ind for ind in offspring if ind.fitness.values[0]<30.0]
    part1 = map(toolbox.clone, shitz)
    count1=0

    for i in range(len(offspring)-len(part1)):
        x=999.0
        indi = toolbox.population(n=1)
        while x >30:
            indi = toolbox.population(n=1)
            f = map(toolbox.evaluate, indi)
            x=f[0][0]
            indi[0].fitness.values = f[0]
        count1+=1
        part1.append(indi[0])

    fitnesses = [ind.fitness.values[0] for ind in part1]
    f_ave = np.mean(fitnesses)
    f_min = min(fitnesses)

    probab = []
    below_average=0
    for i in range(len(part1)):
        if fitnesses[i]>f_ave:
            probab.append(PROB_X_MAX)
        else:
            below_average+=1
            probab.append(2.2*(PROB_X_MAX-PROB_X_MIN)*(fitnesses[i]-f_min)/(fitnesses[i]+f_ave-2*f_min)+PROB_X_MIN)
    
    
    #print f_ave, f_min, below_average
    
    #for i in range(10):
    #    print fitnesses[i], probab[i]
    print g, hof[0].fitness.values[0],f_min, f_ave, below_average  
    

    # Mate and Mutate
    a = []
    b = []
    for i in range(len(part1)):           
        if random.random() < probab[i]:
            a.append(part1[i])
        else:
            b.append(part1[i])
    if len(a)%2==1:
        b.append(a.pop()) 

    for c1,c2 in zip(a[::2],a[1::2]):
        toolbox.mate(c1,c2)
        for mutant in [c1,c2]:
            if random.random() < 0.1:
                if random.random()<MUT_I:
                    toolbox.mutateI(mutant)
                elif random.random()< MUT_R:
                    toolbox.mutateR(mutant)
                elif random.random()< MUT_U:
                    toolbox.mutateU(mutant)
                elif random.random()< MUT_E:
                    toolbox.mutateE(mutant)
                else:
                    toolbox.mutateS(mutant)
        del c1.fitness.values
        del c2.fitness.values
    
    offspring[:] = a+b

    # Calculate invalid fitness
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]

    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):

        ind.fitness.values = fit
        
    pop[:]=offspring
    hof_t=map(toolbox.clone, hof)
    hof.update(pop)
    
    #if count==epoch:
    #    result = str(g) + " hall of fame:" + str(hof[0].fitness.values[0])
    #    print result
    #    count=0
    #count+=1
    
#    print str(gp.PrimitiveTree(hof[0]))
#    if same_hof(hof_t,hof):
#        count+=1
#    else:
#        count =0
#    if count>30:
        

0 0.568388526107 0.568388526107 3.15024439487 76
1 0.568388526107 0.568388526107 3.2560186665 75
2 0.568388526107 0.568388526107 2.56561345805 81
3 0.568388526107 0.568388526107 2.18433038093 83
4 0.568388526107 0.568388526107 2.57934479591 83
5 0.568388526107 0.568388526107 2.56515061719 80
6 0.568388526107 0.568388526107 2.90834183194 78
7 0.568388526107 0.568388526107 3.02366885084 77
8 0.568388526107 0.568388526107 2.53788620143 83
9 0.568388526107 0.568388526107 3.10404064974 79
10 0.568388526107 0.568388526107 2.62686116389 79
11 0.568388526107 0.568388526107 2.53551299848 80
12 0.568388526107 0.568388526107 2.41975312456 81
13 0.568388526107 0.568388526107 2.48503705545 80
14 0.568388526107 0.568388526107 2.95172900404 79
15 0.568388526107 0.568388526107 2.59984775592 82
16 0.568388526107 0.568388526107 2.4667077548 82
17 0.568388526107 0.568388526107 2.0016577538 87
18 0.568388526107 0.568388526107 2.23366738972 87
19 0.568388526107 0.568388526107 2.25986905277 85
20 0.56838852

175 0.54851567776 0.54851567776 1.81490895054 88
176 0.54851567776 0.54851567776 1.80689527889 88
177 0.54851567776 0.54851567776 1.85092800322 88
178 0.54851567776 0.54851567776 2.01339206473 87
179 0.54851567776 0.54851567776 2.19089554452 85
180 0.54851567776 0.54851567776 1.7252493105 85
181 0.54851567776 0.54851567776 2.09103759425 86
182 0.54851567776 0.54851567776 1.92732113648 86
183 0.54851567776 0.54851567776 1.40738568983 88
184 0.54851567776 0.54851567776 2.02811028632 87
185 0.54851567776 0.54851567776 2.53027761281 82
186 0.54851567776 0.54851567776 2.42871865219 82
187 0.54851567776 0.54851567776 2.12027338551 87
188 0.54851567776 0.54851567776 2.22026091021 87
189 0.54851567776 0.54851567776 2.28659958844 86
190 0.54851567776 0.54851567776 2.65392697253 85
191 0.54851567776 0.54851567776 2.35247461649 85
192 0.54851567776 0.54851567776 2.23749330357 87
193 0.54851567776 0.54851567776 2.26348112379 87
194 0.54851567776 0.54851567776 2.40339126156 86
195 0.54851567776 0.5

363 0.54851567776 0.54851567776 1.91100878414 89
364 0.54851567776 0.54851567776 2.14654694598 88
365 0.54851567776 0.54851567776 2.11000301641 89
366 0.54851567776 0.54851567776 2.28214082368 88
367 0.54851567776 0.54851567776 1.65105660132 89
368 0.54851567776 0.54851567776 1.71571606009 87
369 0.54851567776 0.54851567776 2.09509306268 84
370 0.54851567776 0.54851567776 2.36557531005 84
371 0.54851567776 0.54851567776 2.14469912133 87
372 0.54851567776 0.54851567776 2.99498644129 80
373 0.54851567776 0.54851567776 2.21657139247 84
374 0.54851567776 0.54851567776 2.3738175863 84
375 0.54851567776 0.54851567776 2.32936538145 85
376 0.54851567776 0.54851567776 2.11821171576 85
377 0.54851567776 0.54851567776 1.70995811201 88
378 0.54851567776 0.54851567776 1.78212716992 87
379 0.54851567776 0.54851567776 2.42295055678 82
380 0.54851567776 0.54851567776 2.6385243549 82
381 0.54851567776 0.54851567776 2.59296957337 83
382 0.54851567776 0.54851567776 2.32608705065 84
383 0.54851567776 0.54

526 0.526505234095 0.526505234095 2.15697043482 87
527 0.526505234095 0.526505234095 2.00032370316 87
528 0.526505234095 0.526505234095 2.35359058365 81
529 0.526505234095 0.526505234095 2.17672916589 83
530 0.526505234095 0.526505234095 2.22053197671 84
531 0.526505234095 0.526505234095 1.85542657921 86
532 0.526505234095 0.526505234095 1.9851194086 86
533 0.526505234095 0.526505234095 1.92891772239 87
534 0.526505234095 0.526505234095 1.73196441204 87
535 0.526505234095 0.526505234095 2.28277937195 84
536 0.526505234095 0.526505234095 2.21491438673 85
537 0.526505234095 0.526505234095 2.05466369053 87
538 0.526505234095 0.526505234095 2.09348085373 85
539 0.526505234095 0.526505234095 2.19160774812 83
540 0.526505234095 0.526505234095 2.31498307519 82
541 0.526505234095 0.526505234095 2.34490255912 84
542 0.526505234095 0.526505234095 2.4103407027 84
543 0.526505234095 0.526505234095 1.93002742502 87
544 0.526505234095 0.526505234095 2.13570597843 85
545 0.526505234095 0.526505234095

704 0.522587212807 0.522587212807 2.30733356745 83
705 0.522587212807 0.522587212807 2.30757477911 85
706 0.522587212807 0.522587212807 2.16030999012 86
707 0.522587212807 0.522587212807 2.241811832 83
708 0.522587212807 0.522587212807 2.40962825356 82
709 0.522587212807 0.522587212807 2.45706454402 83
710 0.522587212807 0.522587212807 2.44280646919 83
711 0.522587212807 0.522587212807 1.98887059138 86
712 0.522587212807 0.522587212807 2.21696045828 83
713 0.522587212807 0.522587212807 2.02998622007 87
714 0.522587212807 0.522587212807 2.12842488385 88
715 0.522587212807 0.522587212807 2.35472107949 86
716 0.522587212807 0.522587212807 2.37784832705 86
717 0.522587212807 0.522587212807 2.5680717258 85
718 0.522587212807 0.522587212807 2.47880209959 84
719 0.522587212807 0.522587212807 2.49675340368 85
720 0.522587212807 0.522587212807 2.5564572104 83
721 0.522587212807 0.522587212807 2.29290011345 85
722 0.522587212807 0.522587212807 2.46737689126 84
723 0.522587212807 0.522587212807 2

875 0.522587212807 0.522587212807 2.22871366739 86
876 0.522587212807 0.522587212807 2.19663634056 89
877 0.522587212807 0.522587212807 2.01684834498 85
878 0.522587212807 0.522587212807 2.50262980863 83
879 0.522587212807 0.522587212807 2.65018341851 83
880 0.522587212807 0.522587212807 2.41737823723 84
881 0.522587212807 0.522587212807 2.91394427131 81
882 0.522587212807 0.522587212807 2.82024516074 83
883 0.522587212807 0.522587212807 2.65371400187 84
884 0.522587212807 0.522587212807 3.17067850052 81
885 0.522587212807 0.522587212807 2.7088266631 82
886 0.522587212807 0.522587212807 2.78620434466 81
887 0.522587212807 0.522587212807 2.56643109763 83
888 0.522587212807 0.522587212807 2.37241925886 84
889 0.522587212807 0.522587212807 2.36490767806 83
890 0.522587212807 0.522587212807 2.57813242661 82
891 0.522587212807 0.522587212807 2.50854081005 83
892 0.522587212807 0.522587212807 2.32369283172 84
893 0.522587212807 0.522587212807 2.47631086586 84
894 0.522587212807 0.52258721280

1044 0.522587212807 0.522587212807 2.3524358897 82
1045 0.522587212807 0.522587212807 2.50693204822 81
1046 0.522587212807 0.522587212807 2.52091241674 80
1047 0.522587212807 0.522587212807 2.27394654492 82
1048 0.522587212807 0.522587212807 2.33494713976 83
1049 0.522587212807 0.522587212807 2.36095515108 83
1050 0.522587212807 0.522587212807 2.32967297699 83
1051 0.522587212807 0.522587212807 2.42152765019 83
1052 0.522587212807 0.522587212807 2.39885834438 82
1053 0.522587212807 0.522587212807 2.71455547226 81
1054 0.522587212807 0.522587212807 2.4755908159 83
1055 0.522587212807 0.522587212807 2.69224125622 83
1056 0.522587212807 0.522587212807 2.67253027861 81
1057 0.522587212807 0.522587212807 2.89564912912 80
1058 0.522587212807 0.522587212807 2.7402254896 82
1059 0.522587212807 0.522587212807 2.43235486162 83
1060 0.522587212807 0.522587212807 2.40967509649 84
1061 0.522587212807 0.522587212807 2.47761023707 84
1062 0.522587212807 0.522587212807 2.2477995877 85
1063 0.522587212

1215 0.522587212807 0.522587212807 2.83751636004 80
1216 0.522587212807 0.522587212807 2.88537506868 81
1217 0.522587212807 0.522587212807 2.75531571887 83
1218 0.522587212807 0.522587212807 2.74912323479 81
1219 0.522587212807 0.522587212807 2.17634948542 82
1220 0.522587212807 0.522587212807 2.3426412631 82
1221 0.522587212807 0.522587212807 2.4432046432 83
1222 0.522587212807 0.522587212807 2.59962487181 84
1223 0.522587212807 0.522587212807 2.24109894231 85
1224 0.522587212807 0.522587212807 2.26083887333 86
1225 0.522587212807 0.522587212807 2.43442167976 84
1226 0.522587212807 0.522587212807 2.93092186304 82
1227 0.522587212807 0.522587212807 2.66839252446 82
1228 0.522587212807 0.522587212807 2.43646387812 84
1229 0.522587212807 0.522587212807 2.29885505225 85
1230 0.522587212807 0.522587212807 2.01100213177 87
1231 0.522587212807 0.522587212807 1.83966658436 89
1232 0.522587212807 0.522587212807 1.93192781127 90
1233 0.522587212807 0.522587212807 2.09680330329 86
1234 0.5225872

1376 0.522587212807 0.522587212807 2.15906441296 84
1377 0.522587212807 0.522587212807 2.29394278913 84
1378 0.522587212807 0.522587212807 2.35335891565 85
1379 0.522587212807 0.522587212807 2.36391125652 86
1380 0.522587212807 0.522587212807 2.62057441835 86
1381 0.522587212807 0.522587212807 2.625496824 86
1382 0.522587212807 0.522587212807 2.52662053971 84
1383 0.522587212807 0.522587212807 2.16235058038 86
1384 0.522587212807 0.522587212807 2.10283998188 86
1385 0.522587212807 0.522587212807 1.9852334554 86
1386 0.522587212807 0.522587212807 2.09351017828 86
1387 0.522587212807 0.522587212807 2.10594202589 87
1388 0.522587212807 0.522587212807 2.20469819835 85
1389 0.522587212807 0.522587212807 2.16501890071 86
1390 0.522587212807 0.522587212807 2.64369763671 85
1391 0.522587212807 0.522587212807 2.49673752615 84
1392 0.522587212807 0.522587212807 2.44706713133 85
1393 0.522587212807 0.522587212807 2.52211457819 86
1394 0.522587212807 0.522587212807 2.18097323467 86
1395 0.52258721

1549 0.522587212807 0.522587212807 2.2214508982 85
1550 0.522587212807 0.522587212807 2.22655407515 85
1551 0.522587212807 0.522587212807 1.89919000609 87
1552 0.522587212807 0.522587212807 2.06342688262 88
1553 0.522587212807 0.522587212807 1.97855281041 86
1554 0.522587212807 0.522587212807 2.23763903766 84
1555 0.522587212807 0.522587212807 2.14728406566 84
1556 0.522587212807 0.522587212807 2.41641207044 81
1557 0.522587212807 0.522587212807 2.26227894787 83
1558 0.522587212807 0.522587212807 2.45258898732 84
1559 0.522587212807 0.522587212807 2.909905407 81
1560 0.522587212807 0.522587212807 2.39140621269 81
1561 0.522587212807 0.522587212807 2.53965037793 84
1562 0.522587212807 0.522587212807 2.61951780834 81
1563 0.522587212807 0.522587212807 3.11429047421 77
1564 0.522587212807 0.522587212807 3.15770137558 78
1565 0.522587212807 0.522587212807 3.15048199204 79
1566 0.522587212807 0.522587212807 2.8407528527 78
1567 0.522587212807 0.522587212807 2.94291050415 80
1568 0.522587212

KeyboardInterrupt: 

In [17]:
range(0)


[]

In [35]:
str(gp.PrimitiveTree(hof[0]))


'add(sub(-0.9571310673948679, Sex), mul(3.141592653589793, Sex))'