In [1]:
import pandas as pd
import numpy as np
import re as re

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
validate = pd.read_csv('gender_submission.csv')

In [2]:
# Begin to Look at the Features. This is based on https://www.kaggle.com/sinakhorami/titanic-best-working-classifier
# Not a solution, just a guide on how to engineer and clean up data to boost accuracy without overfitting

# Mapping Sex
train['Sex_binary'] = train['Sex'].map({'male': 1, 'female': 0})
test['Sex_binary'] = test['Sex'].map({'male': 1, 'female': 0})

In [3]:
# Building the size of the family
train['FamilySize'] = train['SibSp'] + train['Parch'] + 1
test['FamilySize'] = test['SibSp'] + test['Parch'] + 1
train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_binary,FamilySize
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,1,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,0,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,1,1
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,1,1
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S,1,1
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S,1,5
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S,0,3
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C,0,2


In [4]:
# Now what matters, if the person was alone or not
train['IsAlone'] = 0
train.loc[train['FamilySize'] == 1, 'IsAlone'] = 1
test['IsAlone'] = 0
test.loc[test['FamilySize'] == 1, 'IsAlone'] = 1

In [5]:
# Filling empty cells with the most repeated shore
train['Embarked'] = train['Embarked'].fillna('S')
test['Embarked'] = test['Embarked'].fillna('S')

# Mapping shores
train['Embarked'] = train['Embarked'].map({'S':2,'Q':1,'C':0})
test['Embarked'] = test['Embarked'].map({'S':2,'Q':1,'C':0})

In [6]:
# Filling empty cells with median fare
train['Fare'] = train['Fare'].fillna(train['Fare'].median())
test['Fare'] = train['Fare'].fillna(train['Fare'].median())

# Splitting into 4 categories to generalize
train['CategoricalFare'] = pd.qcut(train['Fare'], 4)

# Mapping the categories 
train.loc[train['Fare'] <= 7.91, 'Fare'] = 0
train.loc[(train['Fare'] > 7.91) & (train['Fare'] <= 14.454), 'Fare'] = 1
train.loc[(train['Fare'] > 14.454) & (train['Fare'] <= 31), 'Fare'] = 2
train.loc[train['Fare'] > 31, 'Fare'] = 3
train['Fare'] = train['Fare'].astype(int)

test.loc[train['Fare'] <= 7.91, 'Fare'] = 0
test.loc[(train['Fare'] > 7.91) & (test['Fare'] <= 14.454), 'Fare'] = 1
test.loc[(train['Fare'] > 14.454) & (test['Fare'] <= 31), 'Fare'] = 2
test.loc[train['Fare'] > 31, 'Fare'] = 3
test['Fare'] = test['Fare'].astype(int)

In [7]:
# Filling (a lot) of empty age cells with a random number between the mean - std and mean + std
ageMean = train['Age'].mean()
ageStd = train['Age'].std()

train['Age'] = train['Age'].fillna(np.random.randint(ageMean - ageStd, ageMean + ageStd))
test['Age'] = test['Age'].fillna(np.random.randint(ageMean - ageStd, ageMean + ageStd))

# Splitting into 5 categories to generalize
train['CategoricalAge'] = pd.cut(train['Age'], 5)

# Mapping the categories
train.loc[ train['Age'] <= 16, 'Age'] = 0
train.loc[(train['Age'] > 16) & (train['Age'] <= 32), 'Age'] = 1
train.loc[(train['Age'] > 32) & (train['Age'] <= 48), 'Age'] = 2
train.loc[(train['Age'] > 48) & (train['Age'] <= 64), 'Age'] = 3
train.loc[ train['Age'] > 64, 'Age'] 

test.loc[ test['Age'] <= 16, 'Age'] = 0
test.loc[(test['Age'] > 16) & (test['Age'] <= 32), 'Age'] = 1
test.loc[(test['Age'] > 32) & (test['Age'] <= 48), 'Age'] = 2
test.loc[(test['Age'] > 48) & (test['Age'] <= 64), 'Age'] = 3
test.loc[test['Age'] > 64, 'Age'] 

81    67.0
96    76.0
Name: Age, dtype: float64

In [8]:
# Getting titles of the people aboard (func from the same source listed above)
def get_title(name):
    title_search = re.search(' ([A-Za-z]+)\.', name)
    # If the title exists, extract and return it.
    if title_search:
        return title_search.group(1)
    return ""

train['Title'] = train['Name'].apply(get_title)
test['Title'] = test['Name'].apply(get_title)
    
train['Title'] = train['Title'].replace(['Lady','Countess','Capt', 'Col','Don','Dr','Major','Rev','Sir','Jonkheer','Dona'],'Rare')
train['Title'] = train['Title'].replace('Mlle', 'Miss')
train['Title'] = train['Title'].replace('Ms', 'Miss')
train['Title'] = train['Title'].replace('Mme', 'Mrs')

test['Title'] = test['Title'].replace(['Lady','Countess','Capt', 'Col','Don','Dr','Major','Rev','Sir','Jonkheer','Dona'],'Rare')
test['Title'] = test['Title'].replace('Mlle', 'Miss')
test['Title'] = test['Title'].replace('Ms', 'Miss')
test['Title'] = test['Title'].replace('Mme', 'Mrs')

# Mapping titles
title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
train['Title'] = train['Title'].map(title_mapping)
train['Title'] = train['Title'].fillna(0)

test['Title'] = test['Title'].map(title_mapping)
test['Title'] = test['Title'].fillna(0)

In [9]:
dropElements = ['PassengerId','Name','Sex','SibSp','Parch','Ticket','Cabin','FamilySize','CategoricalFare','CategoricalAge']
train = train.drop(dropElements, axis=1)
test = test.drop(dropElements, axis=1)

Unnamed: 0,Survived,Pclass,Age,Fare,Embarked,Sex_binary,IsAlone,Title
0,0,3,1.0,0,2,1,0,1
1,1,1,2.0,3,0,0,0,3
2,1,3,1.0,1,2,0,1,2
3,1,1,2.0,3,2,0,0,3
4,0,3,2.0,1,2,1,1,1
5,0,3,1.0,1,1,1,1,1
6,0,1,3.0,3,2,1,1,1
7,0,3,0.0,2,2,1,0,4
8,1,3,1.0,1,2,0,0,3
9,1,2,0.0,2,0,0,0,3


In [17]:
features = ['Pclass','Age','Fare','Embarked','Sex_binary','IsAlone','Title']
target = 'Survived'
train[features].head(3)

Unnamed: 0,Pclass,Age,Fare,Embarked,Sex_binary,IsAlone,Title
0,3,1.0,0,2,1,0,1
1,1,2.0,3,0,0,0,3
2,3,1.0,1,2,0,1,2


In [18]:
X_train = np.array(train[features])

array([[3., 1., 0., ..., 1., 0., 1.],
       [1., 2., 3., ..., 0., 0., 3.],
       [3., 1., 1., ..., 0., 1., 2.],
       ...,
       [3., 1., 2., ..., 0., 0., 2.],
       [1., 1., 2., ..., 1., 1., 1.],
       [3., 1., 0., ..., 1., 1., 1.]])

In [20]:
y_train = np.array(train[target])
y_train = y_train.reshape(-1,1)

array([[0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
    

In [21]:
# Import keras modules
from keras.models import Sequential
from keras.layers import Dense
from keras.initializers import glorot_normal
from keras.optimizers import SGD

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [22]:
model = Sequential()
numNeurons = 2
model.add(Dense(3, input_dim=len(features), activation='relu', kernel_initializer="uniform")) #Si cambiamos esto dos, tenemos 76 
model.add(Dense(numNeurons, activation='sigmoid', kernel_initializer="uniform"))
model.add(Dense(1, activation='sigmoid', kernel_initializer="uniform"))
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mae']) old

sgd = SGD(lr=0.05, decay=1e-6, momentum=0.6, nesterov=True)

model.compile(loss='mean_squared_logarithmic_error', optimizer="adamax", metrics=['mae'])

In [23]:
model.fit(X_train, y_train, epochs=1000, batch_size=100)
#score = model.evaluate(X_test, y_test, batch_size=25)
#score

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/

Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/

Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000
Epoch 436/1000
Epoch 437/1000
Epoch 438/1000
Epoch 439/1000
Epoch 440/1000
Epoch 441/1000
Epoch 442/1000
Epoch 443/1000
Epoch 444/1000
Epoch 445/1000
Epoch 446/1000
Epoch 447/1000
Epoch 448/1000
Epoch 449/1000
Epoch 450/1000
Epoch 451/1000
Epoch 452/1000
Epoch 453/1000
Epoch 454/1000
Epoch 455/1000
Epoch 456/1000
Epoch 457/1000
Epoch 458/1000
Epoch 459/1000
Epoch 460/1000
Epoch 461/1000
Epoch 462/1000
Epoch 463/1000
Epoch 464/1000
Epoch 465/1000
Epoch 466/1000
Epoch 467/1000
Epoch 468/1000
Epoch 469/1000
Epoch 470/1000
Epoch 471/1000
Epoch 472/1000
Epoch 473/1000
Epoch 474/1000
Epoch 475/1000
Epoch 476/1000
Epoch 477/1000
Epoch 478/1000
Epoch 479/1000
Epoch 480/1000
Epoch 481/1000
Epoch 482/1000
Epoch 483/1000
Epoch 484/1000
Epoch 485/1000
Epoch 486/1000
Epoch 487/1000
Epoch 488/1000
Epoch 489/1000
Epoch 490/

Epoch 564/1000
Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Epoch 604/1000
Epoch 605/1000
Epoch 606/1000
Epoch 607/1000
Epoch 608/1000
Epoch 609/1000
Epoch 610/1000
Epoch 611/1000
Epoch 612/1000
Epoch 613/1000
Epoch 614/1000
Epoch 615/1000
Epoch 616/1000
Epoch 617/1000
Epoch 618/1000
Epoch 619/1000
Epoch 620/1000
Epoch 621/1000
Epoch 622/1000
Epoch 623/1000
Epoch 624/1000
Epoch 625/1000
Epoch 626/1000
Epoch 627/1000
Epoch 628/1000
Epoch 629/1000
Epoch 630/

Epoch 704/1000
Epoch 705/1000
Epoch 706/1000
Epoch 707/1000
Epoch 708/1000
Epoch 709/1000
Epoch 710/1000
Epoch 711/1000
Epoch 712/1000
Epoch 713/1000
Epoch 714/1000
Epoch 715/1000
Epoch 716/1000
Epoch 717/1000
Epoch 718/1000
Epoch 719/1000
Epoch 720/1000
Epoch 721/1000
Epoch 722/1000
Epoch 723/1000
Epoch 724/1000
Epoch 725/1000
Epoch 726/1000
Epoch 727/1000
Epoch 728/1000
Epoch 729/1000
Epoch 730/1000
Epoch 731/1000
Epoch 732/1000
Epoch 733/1000
Epoch 734/1000
Epoch 735/1000
Epoch 736/1000
Epoch 737/1000
Epoch 738/1000
Epoch 739/1000
Epoch 740/1000
Epoch 741/1000
Epoch 742/1000
Epoch 743/1000
Epoch 744/1000
Epoch 745/1000
Epoch 746/1000
Epoch 747/1000
Epoch 748/1000
Epoch 749/1000
Epoch 750/1000
Epoch 751/1000
Epoch 752/1000
Epoch 753/1000
Epoch 754/1000
Epoch 755/1000
Epoch 756/1000
Epoch 757/1000
Epoch 758/1000
Epoch 759/1000
Epoch 760/1000
Epoch 761/1000
Epoch 762/1000
Epoch 763/1000
Epoch 764/1000
Epoch 765/1000
Epoch 766/1000
Epoch 767/1000
Epoch 768/1000
Epoch 769/1000
Epoch 770/

Epoch 844/1000
Epoch 845/1000
Epoch 846/1000
Epoch 847/1000
Epoch 848/1000
Epoch 849/1000
Epoch 850/1000
Epoch 851/1000
Epoch 852/1000
Epoch 853/1000
Epoch 854/1000
Epoch 855/1000
Epoch 856/1000
Epoch 857/1000
Epoch 858/1000
Epoch 859/1000
Epoch 860/1000
Epoch 861/1000
Epoch 862/1000
Epoch 863/1000
Epoch 864/1000
Epoch 865/1000
Epoch 866/1000
Epoch 867/1000
Epoch 868/1000
Epoch 869/1000
Epoch 870/1000
Epoch 871/1000
Epoch 872/1000
Epoch 873/1000
Epoch 874/1000
Epoch 875/1000
Epoch 876/1000
Epoch 877/1000
Epoch 878/1000
Epoch 879/1000
Epoch 880/1000
Epoch 881/1000
Epoch 882/1000
Epoch 883/1000
Epoch 884/1000
Epoch 885/1000
Epoch 886/1000
Epoch 887/1000
Epoch 888/1000
Epoch 889/1000
Epoch 890/1000
Epoch 891/1000
Epoch 892/1000
Epoch 893/1000
Epoch 894/1000
Epoch 895/1000
Epoch 896/1000
Epoch 897/1000
Epoch 898/1000
Epoch 899/1000
Epoch 900/1000
Epoch 901/1000
Epoch 902/1000
Epoch 903/1000
Epoch 904/1000
Epoch 905/1000
Epoch 906/1000
Epoch 907/1000
Epoch 908/1000
Epoch 909/1000
Epoch 910/

Epoch 984/1000
Epoch 985/1000
Epoch 986/1000
Epoch 987/1000
Epoch 988/1000
Epoch 989/1000
Epoch 990/1000
Epoch 991/1000
Epoch 992/1000
Epoch 993/1000
Epoch 994/1000
Epoch 995/1000
Epoch 996/1000
Epoch 997/1000
Epoch 998/1000
Epoch 999/1000
Epoch 1000/1000


<keras.callbacks.History at 0xb24c6ecc0>

In [24]:
X_test = np.array(test[features])

array([[3., 2., 0., ..., 1., 1., 1.],
       [3., 2., 0., ..., 0., 0., 3.],
       [2., 3., 0., ..., 1., 1., 1.],
       ...,
       [3., 2., 0., ..., 1., 1., 1.],
       [3., 2., 0., ..., 1., 1., 1.],
       [3., 2., 0., ..., 1., 0., 4.]])

In [25]:
predicciones = model.predict(X_test)

[[0.08369523]
 [0.5272342 ]
 [0.10342231]
 [0.08436327]
 [0.63674873]
 [0.0886864 ]
 [0.5446165 ]
 [0.11437111]
 [0.73425347]
 [0.08275958]
 [0.08187657]
 [0.32521155]
 [0.84433174]
 [0.09014101]
 [0.831257  ]
 [0.82743526]
 [0.12260936]
 [0.09241106]
 [0.35038513]
 [0.6566918 ]
 [0.32297707]
 [0.39878616]
 [0.8317204 ]
 [0.6021984 ]
 [0.84433174]
 [0.07989825]
 [0.84433174]
 [0.09241106]
 [0.32521155]
 [0.08430487]
 [0.09014101]
 [0.11437111]
 [0.5272342 ]
 [0.5272342 ]
 [0.6021984 ]
 [0.09241106]
 [0.2984654 ]
 [0.43105063]
 [0.08436327]
 [0.08187657]
 [0.08430487]
 [0.32521155]
 [0.08187657]
 [0.7932598 ]
 [0.831257  ]
 [0.08436327]
 [0.54521394]
 [0.08369523]
 [0.8353239 ]
 [0.5272342 ]
 [0.3805493 ]
 [0.20251942]
 [0.75188863]
 [0.8339403 ]
 [0.14786151]
 [0.49611148]
 [0.08187657]
 [0.08436327]
 [0.08094895]
 [0.83775544]
 [0.08436327]
 [0.12856767]
 [0.08436327]
 [0.5446165 ]
 [0.837538  ]
 [0.7453348 ]
 [0.5446165 ]
 [0.32521155]
 [0.6702854 ]
 [0.8053726 ]
 [0.5446165 ]
 [0.08

In [26]:
predicciones = predicciones.tolist()

In [27]:
pre = pd.Series(predicciones)
validate['prediccion'] = pre
validate['prediccion'] = validate['prediccion'].str.get(0)
validate

Unnamed: 0,PassengerId,Survived,prediccion
0,892,0,0.083695
1,893,1,0.527234
2,894,0,0.103422
3,895,0,0.084363
4,896,1,0.636749
5,897,0,0.088686
6,898,1,0.544617
7,899,0,0.114371
8,900,1,0.734253
9,901,0,0.082760


In [28]:
coincidencias = []
for dato in validate.prediccion:
    if dato >= 0.5:
        coincidencias.append(1)
    else:
        coincidencias.append(0)
validate['final'] = coincidencias
validate

Unnamed: 0,PassengerId,Survived,prediccion,final
0,892,0,0.083695,0
1,893,1,0.527234,1
2,894,0,0.103422,0
3,895,0,0.084363,0
4,896,1,0.636749,1
5,897,0,0.088686,0
6,898,1,0.544617,1
7,899,0,0.114371,0
8,900,1,0.734253,1
9,901,0,0.082760,0


In [29]:
coincide = 0
coincide = sum(validate['Survived'] == validate['final'])
print(coincide)
print(float(coincide)/ float(len(validate)))


match = 0
nomatch = 0
for val in validate.values:
    if val[1] == val[3]:
        match = match +1
    else:
        nomatch = nomatch +1
print(float(match)/float(len(validate)))

354
0.84688995215311
0.84688995215311


In [30]:
toKaggle = pd.DataFrame({'PassengerId':validate['PassengerId'],
                         'Survived':validate['final']})

toKaggle.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


# output file with your prediction

In [31]:
from datetime import datetime
archivo = 'TitanicPred.csv'

toKaggle.to_csv(archivo,index=False)

print('Creado: ' + archivo)

Creado: TitanicPred.csv
