# Kaggle Titanic Competition
## Titanic: Machine Learning from Disaster


***
## 1. Data Extraction
### Import Libraries

In [275]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

import keras
from keras.layers import Dense,Dropout
from keras.models import Sequential
from keras.utils.np_utils import to_categorical

### Load Data 

In [111]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

### Check data frame structure 

In [112]:
train.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [113]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


***
## 2. Data Cleaning
### Check and fill missing values 

In [115]:
train.count()
test.count()

PassengerId    418
Pclass         418
Name           418
Sex            418
Age            332
SibSp          418
Parch          418
Ticket         418
Fare           417
Cabin           91
Embarked       418
dtype: int64

In [116]:
train["Age"] = train["Age"].fillna(train["Age"].median())
train["Embarked"] = train["Embarked"].fillna("S")
train.count()

test["Age"] = test["Age"].fillna(test["Age"].median())
test["Embarked"] = test["Embarked"].fillna("S")
test["Fare"] = test["Fare"].fillna(test["Fare"].median())
test.count()

PassengerId    418
Pclass         418
Name           418
Sex            418
Age            418
SibSp          418
Parch          418
Ticket         418
Fare           418
Cabin           91
Embarked       418
dtype: int64

### Convert string to numeric columns 

In [117]:
sex = {'male':0, 'female':1}
train["Sex"] = train["Sex"].map(sex)
train["Sex"].head(10)

sex = {'male':0, 'female':1}
test["Sex"] = test["Sex"].map(sex)
test["Sex"].head(10)

0    0
1    1
2    0
3    0
4    1
5    0
6    1
7    0
8    1
9    0
Name: Sex, dtype: int64

In [118]:
embarked = {'S':0, 'C':1, 'Q':2}
train["Embarked"] = train["Embarked"].map(embarked)
train["Embarked"].head(10)

embarked = {'S':0, 'C':1, 'Q':2}
test["Embarked"] = test["Embarked"].map(embarked)
test["Embarked"].head(10)

0    2
1    0
2    2
3    0
4    0
5    0
6    2
7    0
8    1
9    0
Name: Embarked, dtype: int64

***
## 3. Data Analysis
### Plotting and Visualizing Data
### Hypotheses and Assumptions

### Feature Engineering

***
## 4. Train and Evaluate the Model
### Set target, features to train model 

In [223]:
# Create the target and features numpy arrays: target, features_one
target = train["Survived"].values
#target_six = to_categorical(train["Survived"].values)

features_one = train[["Pclass", "Sex", "Age", "Fare"]].values
test_features_one = test[["Pclass", "Sex", "Age", "Fare"]].values

features_two = train[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values
test_features_two = test[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values

train_three = train.copy()
train_three["family_size"] = train_three["SibSp"] + train_three["Parch"] + 1
features_three = train_three[["Pclass", "Sex", "Age", "Fare", "SibSp", "Parch", "family_size"]].values
test_three = test.copy()
test_three["family_size"] = test_three["SibSp"] + test_three["Parch"] + 1
test_features_three = test_three[["Pclass", "Sex", "Age", "Fare", "SibSp", "Parch", "family_size"]].values

features_four = train[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values
test_features_four = test[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values

train_five = train.copy()
train_five["family_size"] = train_five["SibSp"] + train_five["Parch"] + 1
features_five = train_five[["Pclass", "Sex", "Age", "Fare", "SibSp", "Parch", "Embarked", "family_size"]].values
test_five = test.copy()
test_five["family_size"] = test_five["SibSp"] + test_five["Parch"] + 1
test_features_five = test_five[["Pclass", "Sex", "Age", "Fare", "SibSp", "Parch", "Embarked", "family_size"]].values

features_six = train[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values
test_features_six = test[["Pclass","Age","Sex","Fare", "SibSp", "Parch", "Embarked"]].values

train.count()

PassengerId    891
Survived       891
Pclass         891
Name           891
Sex            891
Age            891
SibSp          891
Parch          891
Ticket         891
Fare           891
Cabin          204
Embarked       891
dtype: int64

### Select and train the model 

In [277]:
# Fit your first decision tree: my_tree_one
my_tree_one = tree.DecisionTreeClassifier()
my_tree_one = my_tree_one.fit(features_one, target)

my_tree_two = tree.DecisionTreeClassifier(max_depth = 10, min_samples_split = 5, random_state = 1)
my_tree_two = my_tree_two.fit(features_two, target)

my_tree_three = tree.DecisionTreeClassifier()
my_tree_three = my_tree_three.fit(features_three, target)

my_tree_four = RandomForestClassifier(max_depth=10, min_samples_split=2, n_estimators=100, random_state=1)
my_tree_four = my_tree_four.fit(features_four, target)

my_tree_five = RandomForestClassifier(max_depth=10, min_samples_split=2, n_estimators=100, random_state=1)
my_tree_five = my_tree_five.fit(features_five, target)

# Multilayer Perceptron Model
input_shape = (features_six.shape[1],)
model_six = Sequential()

model_six.add(Dense(12, activation='relu', input_dim=7))
model_six.add(Dropout(0.5))
model_six.add(Dense(64, activation='relu'))
model_six.add(Dropout(0.5))
model_six.add(Dense(1, activation='sigmoid'))

model_six.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

model_six.fit(features_six, target, epochs=500, batch_size=128)
#model_six.fit(features_six, target)#, epoch=20, validation_split=0.4) #, callbacks=[early_stopping_monitor], verbose=False)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoc

Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 

Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 

Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 

Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.History at 0x1f8bc62dd68>

### Performance evaluation 

In [278]:
# Look at the importance and score of the included features
print(my_tree_one.feature_importances_)
print(my_tree_one.score(features_one, target))

print(my_tree_two.feature_importances_)
print(my_tree_two.score(features_two, target))

print(my_tree_three.feature_importances_)
print(my_tree_three.score(features_three, target))

print(my_tree_four.feature_importances_)
print(my_tree_four.score(features_four, target))

print(my_tree_five.feature_importances_)
print(my_tree_five.score(features_five, target))

[ 0.12315342  0.31274009  0.24940626  0.31470023]
0.977553310887
[ 0.14130255  0.17906027  0.41616727  0.17938711  0.05039699  0.01923751
  0.0144483 ]
0.905723905724
[ 0.10681611  0.31088095  0.22717985  0.26491993  0.01318868  0.01739451
  0.05961997]
0.979797979798
[ 0.10384741  0.20139027  0.31989322  0.24602858  0.05272693  0.04159232
  0.03452128]
0.939393939394
[ 0.09915728  0.31168455  0.20147389  0.22532166  0.03731813  0.03178077
  0.03580558  0.05745814]
0.938271604938


In [280]:
score = model_six.evaluate(features_six, target, batch_size=128)

128/891 [===>..........................] - ETA: 0s

***
## 5. Predict the Test set

### Select test model features 

In [231]:
test.count()

PassengerId    418
Pclass         418
Name           418
Sex            418
Age            418
SibSp          418
Parch          418
Ticket         418
Fare           418
Cabin           91
Embarked       418
dtype: int64

### Make predictions 

In [284]:
# Make your prediction using the test set
my_prediction = my_tree_one.predict(test_features).reshape((418,1))
my_prediction.shape

my_prediction_two = my_tree_two.predict(test_features_two).reshape((418,1))
my_prediction_two.shape

my_prediction_three = my_tree_three.predict(test_features_three).reshape((418,1))
my_prediction_three.shape

my_prediction_four = my_tree_four.predict(test_features_four).reshape((418,1))
my_prediction_four.shape

my_prediction_five = my_tree_five.predict(test_features_five).reshape((418,1))
my_prediction_five.shape

my_prediction_six = model_six.predict(test_features_six)
my_prediction_six.view()
#.reshape(418,2)
#my_prediction_six = my_prediction_six[:,0]
#my_prediction_six = (my_prediction_six < 0.5).astype(int)

array([[  3.30899283e-02],
       [  2.94427633e-01],
       [  7.23309407e-04],
       [  1.16859801e-01],
       [  3.55024397e-01],
       [  1.60863131e-01],
       [  6.95835888e-01],
       [  1.51664749e-01],
       [  6.68886781e-01],
       [  1.06849693e-01],
       [  1.16782524e-01],
       [  3.48885238e-01],
       [  9.83466685e-01],
       [  1.70872971e-01],
       [  9.84844267e-01],
       [  9.16520000e-01],
       [  1.87797338e-01],
       [  1.69497088e-01],
       [  2.65213460e-01],
       [  3.37423712e-01],
       [  3.54308188e-01],
       [  3.37859720e-01],
       [  9.85214531e-01],
       [  3.95425886e-01],
       [  8.71451259e-01],
       [  8.09467286e-02],
       [  9.83853698e-01],
       [  1.60154670e-01],
       [  3.59612882e-01],
       [  1.27752811e-01],
       [  1.54426306e-01],
       [  1.31528243e-01],
       [  3.07075441e-01],
       [  3.50460768e-01],
       [  3.84458303e-01],
       [  1.85840219e-01],
       [  4.51461494e-01],
 

***
## 6. Save Results
### Create the Solution 

In [273]:
my_solution = pd.DataFrame(my_prediction_six, test["PassengerId"], columns = ["Survived"])
my_solution.index.name

'PassengerId'

### Save to CSV file 

In [274]:
my_solution.to_csv("my_solution_six.csv")

***
***
# Rough Work 

In [17]:
import time

a = np.random.rand(1000000)
b = np.random.rand(1000000)

tic = time.time()
c = np.dot(a,b)
toc = time.time()

print(c)
print("Vectorized exec time: " + str(1000*(toc-tic)) + "ms")

c = 0
tic = time.time()
for i in range(1000000):
    c += a[i]*b[i]
toc = time.time()

print(c)
print("For-loop exec time: " + str(1000*(toc-tic)) + "ms")

249972.075277
Vectorized exec time: 1.5177726745605469ms
249972.075277
For-loop exec time: 624.934196472168ms


In [18]:
import numpy as np

A = np.array([[56.0, 0.0, 4.4, 68.0],
              [1.2, 104.0, 52.0, 8.0],
              [1.8, 135.0, 99.0, 0.9]])

print(A)

[[  56.     0.     4.4   68. ]
 [   1.2  104.    52.     8. ]
 [   1.8  135.    99.     0.9]]


In [19]:
cal = A.sum(axis=0)
print(cal)

[  59.   239.   155.4   76.9]


In [20]:
cal.reshape(1,4)

array([[  59. ,  239. ,  155.4,   76.9]])

In [22]:
p = 100*A/cal
print(p)

[[ 94.91525424   0.           2.83140283  88.42652796]
 [  2.03389831  43.51464435  33.46203346  10.40312094]
 [  3.05084746  56.48535565  63.70656371   1.17035111]]


In [27]:
B=np.array([1,2,3,4])

In [28]:
print(B)

[1 2 3 4]


In [29]:
print(B.T)

[1 2 3 4]


In [38]:
B


array([1, 2, 3, 4])

In [37]:
B.T

array([1, 2, 3, 4])

In [36]:
B.shape

(4,)

In [34]:
B.reshape(4,1)

array([[1],
       [2],
       [3],
       [4]])

In [39]:
B

array([1, 2, 3, 4])

In [40]:
B=B.reshape(4,1
           )

In [45]:
B

array([[1],
       [2],
       [3],
       [4]])

In [42]:
B.shape

(4, 1)

In [43]:
B.T


array([[1, 2, 3, 4]])

In [44]:
%matplotlib inline