In [1]:
import numpy as np
import pandas as pd

In [2]:
class binary():
    def __init__(self,capas=1,nodos=1):
        self.capas = capas
        self.nodos = nodos
        self.forma = (self.capas,self.nodos)
    def inicializar(self):
        self.red = []
        for capa in range(self.capas):
            w = np.random.normal(size = (self.nodos,self.nodos))
            b = np.random.normal(size = self.nodos)
            self.red.append([w,b])
        w_f = np.random.normal(size = self.nodos)
        b_f = np.random.normal(size = 1)
        self.red.append([w_f,b_f])
    def mutar(self,alfa=0.1):
        red = []
        for r in self.red:
            r0 = r[0]+alfa*np.random.normal(size = r[0].shape)
            r1 = r[1]+alfa*np.random.normal(size = r[1].shape)
            red.append([r0,r1])
        hijo = binary(self.capas,self.nodos)
        hijo.inicializar()
        hijo.red = red
        return hijo
    def cruzar(self,other):
        if self.forma == other.forma:
            hijo = binary(self.capas,self.nodos)
            hijo.inicializar()
            red = []
            for i in range(len(self.red)):
                r0 = (self.red[i][0] + other.red[i][0])/2.0
                r1 = (self.red[i][1] + other.red[i][1])/2.0
                red.append([r0,r1])
            hijo.red = red
            return hijo
        else:
            return None
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    def predecir(self,entrada):
        for r in self.red:
            entrada = np.dot(entrada,r[0])
            entrada = entrada + r[1]
            entrada = self.sigmoid(entrada)
        entrada = entrada.round()
        entrada = int(entrada)
        return entrada
    def evaluar(self,entrada,resultado):
        entrada = self.predecir(entrada)
        if entrada == resultado:
            return 1
        return 0     
            
    

In [3]:
def crear_poblacion(poblacion_por_especie,especies,nodos):
    poblacion = []
    for i in range(poblacion_por_especie):
        for j in especies:
            individuo = binary(j,nodos)
            individuo.inicializar()
            poblacion.append(individuo)
    return poblacion

In [4]:
def evaluar_poblacion(poblacion,muestra):
    scores = []
    for p in poblacion:
        score = 0
        count = 0
        for r in muestra.iterrows():
            v = r[1].target
            u = r[1]
            del u['target']
            score += p.evaluar(u.values,v)
            count += 1
        score = float(score / float(count))
        scores.append(score)
    return scores

In [5]:
def predecir(best_model,test):
    scores = []
    for r in test.iterrows():
        u = r[1]
        score = p.predecir(u)
        scores.append(score)
    return scores

In [6]:
titanic = pd.read_csv('train.csv')
titanic.groupby('Embarked').size()

Embarked
C    168
Q     77
S    644
dtype: int64

In [7]:
titanic = pd.read_csv('train.csv')
titanic = titanic[['Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
titanic.loc[titanic.Embarked=='C','Embarked']=-1
titanic.loc[titanic.Embarked=='Q','Embarked']=0
titanic.loc[titanic.Embarked=='S','Embarked']=1
titanic['target'] = titanic.Survived
titanic['Pclass_1']=np.where(titanic.Pclass==1,1,0)
titanic['Pclass_2']=np.where(titanic.Pclass==2,1,0)
titanic['Pclass_3']=np.where(titanic.Pclass==3,1,0)
titanic['Sex']=np.where(titanic.Sex=='male',1,0)
titanic = titanic.fillna(titanic.mean())
titanic[['Age']] = titanic[['Age']]/titanic[['Age']].mean()
titanic[['Fare']] = titanic[['Fare']]/titanic[['Fare']].mean()
del titanic['Pclass'],titanic['Survived']
len(titanic)

891

In [8]:
titanic_train = titanic[titanic.target==0].sample(250)\
                .append(titanic[titanic.target==1].sample(250))
titanic_test = titanic.loc[~titanic.index.isin(titanic_train.index),:]



In [9]:
poblacion = crear_poblacion(1000,[1],9)
poblacion = pd.DataFrame(list(zip(poblacion,range(len(poblacion)))))
poblacion.columns = ['model','score']

In [10]:
alfa = 2
for i in range(1000):
    alfa = np.random.random()
    scores = evaluar_poblacion(poblacion.model.tolist(),titanic_train.sample(200))
    poblacion.score = scores
    print(i,poblacion.score.mean(),poblacion.score.max(),alfa)
    poblacion = poblacion.sort_values('score',ascending=False)[0:int(len(poblacion)/2)]
    nueva_poblacion = []
    alfa = 1
    for p in poblacion.model.tolist():
        hijo = p.mutar(alfa = alfa)
        nueva_poblacion.append(hijo)
    nueva_poblacion = pd.DataFrame(list(zip(nueva_poblacion,range(len(nueva_poblacion)))))
    nueva_poblacion.columns = ['model','score']
    poblacion = poblacion.append(nueva_poblacion).reset_index(drop=True)


0 0.499555 0.75 2
1 0.5200699999999999 0.785 1
2 0.53775 0.785 1
3 0.5506749999999999 0.795 1
4 0.569605 0.795 1
5 0.59699 0.785 1
6 0.59999 0.84 1
7 0.6156699999999999 0.795 1
8 0.63931 0.81 1
9 0.6468400000000001 0.8 1
10 0.634775 0.78 1
11 0.648105 0.79 1
12 0.673375 0.8 1
13 0.669165 0.8 1
14 0.6880149999999999 0.84 1
15 0.67996 0.81 1
16 0.700345 0.835 1
17 0.6915699999999999 0.8 1
18 0.68771 0.805 1
19 0.68359 0.785 1
20 0.675995 0.81 1
21 0.714885 0.82 1
22 0.69888 0.805 1
23 0.7174200000000001 0.815 1
24 0.7223200000000001 0.8 1
25 0.752565 0.85 1
26 0.7315600000000001 0.83 1
27 0.7480950000000001 0.85 1
28 0.754985 0.845 1
29 0.7274349999999999 0.805 1
30 0.72039 0.82 1
31 0.7540250000000001 0.83 1
32 0.7285 0.81 1
33 0.76175 0.85 1
34 0.73382 0.805 1
35 0.71431 0.79 1
36 0.74243 0.82 1
37 0.7531749999999999 0.835 1
38 0.7589400000000001 0.84 1
39 0.79554 0.87 1
40 0.74897 0.825 1
41 0.7896700000000001 0.865 1
42 0.75555 0.825 1
43 0.77576 0.84 1
44 0.743285 0.815 1
45 0.78853

350 0.8585499999999999 0.88 1
351 0.7863 0.815 1
352 0.8033299999999999 0.84 1
353 0.8278599999999999 0.855 1
354 0.8030550000000001 0.83 1
355 0.844395 0.875 1
356 0.85029 0.88 1
357 0.82102 0.845 1
358 0.8344649999999999 0.86 1
359 0.8437100000000001 0.875 1
360 0.84721 0.875 1
361 0.83039 0.86 1
362 0.81956 0.845 1
363 0.7754 0.805 1
364 0.8164349999999999 0.85 1
365 0.80648 0.835 1
366 0.847055 0.885 1
367 0.8568999999999999 0.885 1
368 0.825455 0.855 1
369 0.8567349999999999 0.885 1
370 0.8393349999999999 0.865 1
371 0.8425250000000001 0.865 1
372 0.8314949999999999 0.85 1
373 0.8253199999999999 0.855 1
374 0.8129400000000001 0.84 1
375 0.8329099999999999 0.86 1
376 0.8341199999999999 0.86 1
377 0.8478399999999999 0.88 1
378 0.7845599999999999 0.815 1
379 0.804175 0.835 1
380 0.84141 0.865 1
381 0.85127 0.88 1
382 0.82817 0.855 1


KeyboardInterrupt: 

In [11]:
scores = evaluar_poblacion(poblacion.model.tolist(),titanic_train)
poblacion.score = scores
poblacion.sort_values('score',ascending=False)

Unnamed: 0,model,score
55,<__main__.binary object at 0x7f6f14fc9e48>,0.846
56,<__main__.binary object at 0x7f6f14fafa20>,0.846
154,<__main__.binary object at 0x7f6f14f05470>,0.846
54,<__main__.binary object at 0x7f6f1524b6a0>,0.844
58,<__main__.binary object at 0x7f6f14ee6cf8>,0.844
20,<__main__.binary object at 0x7f6f14edbb00>,0.844
580,<__main__.binary object at 0x7f6f14ef2cf8>,0.844
36,<__main__.binary object at 0x7f6f14f203c8>,0.844
89,<__main__.binary object at 0x7f6f14fbaeb8>,0.844
39,<__main__.binary object at 0x7f6f15219e10>,0.844


In [12]:
best_model = poblacion.sort_values('score',ascending=False).iloc[0,0]

In [15]:
best_model

<__main__.binary at 0x7f6f14fc9e48>

In [13]:
titanic = pd.read_csv('test.csv')
titanic = titanic[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
titanic.loc[titanic.Embarked=='C','Embarked']=-1
titanic.loc[titanic.Embarked=='Q','Embarked']=0
titanic.loc[titanic.Embarked=='S','Embarked']=1
titanic['Pclass_1']=np.where(titanic.Pclass==1,1,0)
titanic['Pclass_2']=np.where(titanic.Pclass==2,1,0)
titanic['Pclass_3']=np.where(titanic.Pclass==3,1,0)
titanic['Sex']=np.where(titanic.Sex=='male',1,0)
titanic = titanic.fillna(titanic.mean())
titanic[['Age']] = titanic[['Age']]/titanic[['Age']].mean()
titanic[['Fare']] = titanic[['Fare']]/titanic[['Fare']].mean()
del titanic['Pclass']
len(titanic)

418

In [14]:
resultado = pd.read_csv('test.csv')
resultado['Survived'] = predecir(best_model,titanic)
resultado.Survived = resultado.Survived.round().astype(int)
resultado[['PassengerId','Survived']].to_csv('result.csv',index=False)