In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn import preprocessing

In [105]:
train_df = pd.read_csv('./titanic/train.csv')
test_df = pd.read_csv('./titanic/test.csv')

In [106]:
def dummy_creation(df,categories):
    """
    create a dummy DataFrame of category. And concatenate it  original DataFrame. 
    drop the original column
    """
    for i in categories:
        df_dummy = pd.get_dummies(df[i])
        df = pd.concat([df,df_dummy],axis = 1)
        df = df.drop(i,axis=1)
    return df

train_df = dummy_creation(train_df, ['Embarked'])
test_df = dummy_creation(test_df, ['Embarked'])

In [107]:
train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,C,Q,S
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,1,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,0,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,0,0,1


In [109]:
train_df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'C', 'Q', 'S'],
      dtype='object')

In [110]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,C,Q,S
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,0,1,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,0,0,1
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,0,1,0
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,0,0,1
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,0,0,1


In [111]:
#checking unknown values
train_df.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
C                0
Q                0
S                0
dtype: int64

In [112]:
#columns selection
categories = ['Pclass','Sex','Age','Parch','SibSp','C','Q','S']

#features
train_features = train_df[categories]
test_features = test_df[categories]

#converting values of sex columns to binary 
train_features.Sex = (train_features.Sex == 'male').astype(int)
test_features.Sex = (test_features.Sex == 'male').astype(int)

#making labels
train_labels = train_df.Survived


#dealing with null values
train_features = train_features.fillna(train_features.mean())
test_features = test_features.fillna(test_features.mean())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [113]:
train_data = preprocessing.MinMaxScaler().fit_transform(train_features)
test_data = preprocessing.MinMaxScaler().fit_transform(test_features)

In [126]:
#model
length = train_data.shape[1]

model = keras.Sequential()
model.add(keras.layers.Dropout(0.2,input_shape=(length,)))
model.add(keras.layers.Dense(500, activation = tf.nn.relu))
model.add(keras.layers.Dense(500, activation = tf.nn.relu))
model.add(keras.layers.Dense(500, activation = tf.nn.relu))
model.add(keras.layers.Dense(500, activation = tf.nn.relu))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(500, activation = tf.nn.relu))
model.add(keras.layers.Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(train_data, train_labels, epochs=1000)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x1a498d7d10>

In [69]:
#making predictions
prediction = model.predict(test_data)
output = pd.DataFrame({'PassengerId': test_df.PassengerId, 'Survived': np.argmax(prediction,axis=1)})
output.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1


In [127]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_8 (Dropout)          (None, 8)                 0         
_________________________________________________________________
dense_69 (Dense)             (None, 500)               4500      
_________________________________________________________________
dense_70 (Dense)             (None, 500)               250500    
_________________________________________________________________
dense_71 (Dense)             (None, 500)               250500    
_________________________________________________________________
dense_72 (Dense)             (None, 500)               250500    
_________________________________________________________________
dropout_9 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_73 (Dense)             (None, 500)             

In [81]:
def dummy_creation(df,categories):
    """
    create a dummy DataFrame of category. And concatenate it  original DataFrame. 
    drop the original column
    """
    for i in categories:
        df_dummy = pd.get_dummies(df[i])
        df = pd.concat([df,df_dummy],axis = 1)
        df = df.drop(i,axis=1)
    return df

ndf = dummy_creation(train_df, ['Embarked'])

In [82]:
train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [83]:
ndf.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,C,Q,S
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,1,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,0,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,0,0,1


In [84]:
train_df.SibSp.isnull().sum()


0

In [85]:
train_df.SibSp.unique()

array([1, 0, 3, 4, 2, 5, 8])

In [80]:
train_df.Embarked.

TypeError: can only concatenate str (not "int") to str