In [1]:
import numpy as np
import pandas as pd

In [2]:
class binary():
    def __init__(self,capas=1,nodos=1):
        self.capas = capas
        self.nodos = nodos
        self.forma = (self.capas,self.nodos)
    def inicializar(self):
        self.red = []
        for capa in range(self.capas):
            w = np.random.normal(size = (self.nodos,self.nodos))
            b = np.random.normal(size = self.nodos)
            self.red.append([w,b])
        w_f = np.random.normal(size = self.nodos)
        b_f = np.random.normal(size = 1)
        self.red.append([w_f,b_f])
    def mutar(self,alfa=0.1):
        red = []
        for r in self.red:
            r0 = r[0]+alfa*np.random.normal(size = r[0].shape)
            r1 = r[1]+alfa*np.random.normal(size = r[1].shape)
            red.append([r0,r1])
        hijo = binary(self.capas,self.nodos)
        hijo.inicializar()
        hijo.red = red
        return hijo
    def cruzar(self,other):
        if self.forma == other.forma:
            hijo = binary(self.capas,self.nodos)
            hijo.inicializar()
            red = []
            for i in range(len(self.red)):
                r0 = (self.red[i][0] + other.red[i][0])/2.0
                r1 = (self.red[i][1] + other.red[i][1])/2.0
                red.append([r0,r1])
            hijo.red = red
            return hijo
        else:
            return None
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    def predecir(self,entrada):
        for r in self.red:
            entrada = np.dot(entrada,r[0])
            entrada = entrada + r[1]
            entrada = self.sigmoid(entrada)
        entrada = entrada.round()
        entrada = int(entrada)
        return entrada
    def evaluar(self,entrada,resultado):
        entrada = self.predecir(entrada)
        if entrada == resultado:
            return 1
        return 0     
            
    

In [3]:
def crear_poblacion(poblacion_por_especie,especies,nodos):
    poblacion = []
    for i in range(poblacion_por_especie):
        for j in especies:
            individuo = binary(j,nodos)
            individuo.inicializar()
            poblacion.append(individuo)
    return poblacion

In [4]:
def evaluar_poblacion(poblacion,muestra):
    scores = []
    for p in poblacion:
        score = 0
        count = 0
        for r in muestra.iterrows():
            v = r[1].target
            u = r[1]
            del u['target']
            score += p.evaluar(u.values,v)
            count += 1
        score = float(score / float(count))
        scores.append(score)
    return scores

In [5]:
def predecir(best_model,test):
    scores = []
    for r in test.iterrows():
        u = r[1]
        score = p.predecir(u)
        scores.append(score)
    return scores

In [6]:
titanic = pd.read_csv('train.csv')
titanic.groupby('Embarked').size()

Embarked
C    168
Q     77
S    644
dtype: int64

In [7]:
titanic = pd.read_csv('train.csv')
titanic = titanic[['Survived','Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
titanic.loc[titanic.Embarked=='C','Embarked']=-1
titanic.loc[titanic.Embarked=='Q','Embarked']=0
titanic.loc[titanic.Embarked=='S','Embarked']=1
titanic['target'] = titanic.Survived
titanic['Pclass_1']=np.where(titanic.Pclass==1,1,0)
titanic['Pclass_2']=np.where(titanic.Pclass==2,1,0)
titanic['Pclass_3']=np.where(titanic.Pclass==3,1,0)
titanic['Sex']=np.where(titanic.Sex=='male',1,0)
titanic = titanic.fillna(titanic.mean())
titanic[['Age']] = titanic[['Age']]/titanic[['Age']].mean()
titanic[['Fare']] = titanic[['Fare']]/titanic[['Fare']].mean()
del titanic['Pclass'],titanic['Survived']
len(titanic)

891

In [8]:
titanic_train = titanic[titanic.target==0].sample(250)\
                .append(titanic[titanic.target==1].sample(250))
titanic_test = titanic.loc[~titanic.index.isin(titanic_train.index),:]



In [9]:
poblacion = crear_poblacion(1000,[2],9)
poblacion = pd.DataFrame(list(zip(poblacion,range(len(poblacion)))))
poblacion.columns = ['model','score']

In [None]:
alfa = 2
for i in range(1000):
    alfa = np.random.random()
    scores = evaluar_poblacion(poblacion.model.tolist(),titanic_train.sample(200))
    poblacion.score = scores
    print(i,poblacion.score.mean(),poblacion.score.max(),alfa)
    poblacion = poblacion.sort_values('score',ascending=False)[0:int(len(poblacion)/2)]
    nueva_poblacion = []
    alfa = 1
    for p in poblacion.model.tolist():
        hijo = p.mutar(alfa = alfa)
        nueva_poblacion.append(hijo)
    nueva_poblacion = pd.DataFrame(list(zip(nueva_poblacion,range(len(nueva_poblacion)))))
    nueva_poblacion.columns = ['model','score']
    poblacion = poblacion.append(nueva_poblacion).reset_index(drop=True)


0 0.5010099999999998 0.745 0.8000489612103682
1 0.48645999999999995 0.725 0.3517087576695953
2 0.5259950000000001 0.73 0.031398843316631564
3 0.5102 0.76 0.6104045519939534
4 0.5313450000000001 0.765 0.2277447253258097
5 0.543865 0.765 0.8601370677221012
6 0.5608150000000001 0.78 0.1927868449914082
7 0.5795799999999999 0.79 0.7138631488813734
8 0.597215 0.775 0.9821882033667865
9 0.5748900000000001 0.76 0.08001143602873062
10 0.5971400000000001 0.78 0.6963432366532617
11 0.60627 0.76 0.5994936851690534
12 0.6186200000000002 0.77 0.5603203238885995
13 0.637925 0.825 0.05851136769073906
14 0.6319150000000001 0.795 0.8027849578935518
15 0.618875 0.76 0.8520693160084
16 0.6285250000000001 0.78 0.5299626724547235
17 0.65516 0.815 0.08526656478755834
18 0.6382899999999999 0.795 0.8868432375839211
19 0.657335 0.81 0.34012095436898393
20 0.65601 0.8 0.8963136041148514
21 0.6695650000000001 0.805 0.35239444668443853
22 0.6816949999999999 0.865 0.602467690737274
23 0.66291 0.81 0.699472416909626

In [None]:
scores = evaluar_poblacion(poblacion.model.tolist(),titanic_train)
poblacion.score = scores
poblacion.sort_values('score',ascending=False)

In [None]:
best_model = poblacion.sort_values('score',ascending=False).iloc[0,0]

In [None]:
titanic = pd.read_csv('test.csv')
titanic = titanic[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
titanic.loc[titanic.Embarked=='C','Embarked']=-1
titanic.loc[titanic.Embarked=='Q','Embarked']=0
titanic.loc[titanic.Embarked=='S','Embarked']=1
titanic['Pclass_1']=np.where(titanic.Pclass==1,1,0)
titanic['Pclass_2']=np.where(titanic.Pclass==2,1,0)
titanic['Pclass_3']=np.where(titanic.Pclass==3,1,0)
titanic['Sex']=np.where(titanic.Sex=='male',1,0)
titanic = titanic.fillna(titanic.mean())
titanic[['Age']] = titanic[['Age']]/titanic[['Age']].mean()
titanic[['Fare']] = titanic[['Fare']]/titanic[['Fare']].mean()
del titanic['Pclass']
len(titanic)

In [None]:
resultado = pd.read_csv('test.csv')
resultado['Survived'] = predecir(best_model,titanic)
resultado.Survived = resultado.Survived.round().astype(int)
resultado[['PassengerId','Survived']].to_csv('result.csv',index=False)