In [510]:
# Import General Libraries

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Import Data
df = pd.read_csv("titatrain.csv")
target = df['Survived'].values

In [511]:
## Data Pre-processing

# Extract Title 
df['Title']=0
df['Title']=df.Name.str.extract('([A-Za-z]+)\.')
df['Title'].replace(['Mlle','Mme','Ms','Dr','Major','Lady','Countess','Jonkheer','Col',
                         'Rev','Capt','Sir','Don'],['Miss','Miss','Miss','Mr','Mr','Mrs','Mrs','Other','Other','Other','Mr','Mr','Mr'],inplace=True)

In [512]:
# Define Age according to title and Age median

df["Age"].fillna(df.groupby("Title")["Age"].transform("median"), inplace=True)

In [513]:
# Replace NaN Age according to defined Title and Age mean
df.loc[(df.Age.isnull())&(df.Title=='Mr'),'Age']= df.Age[df.Title=="Mr"].mean()
df.loc[(df.Age.isnull())&(df.Title=='Mrs'),'Age']= df.Age[df.Title=="Mrs"].mean()
df.loc[(df.Age.isnull())&(df.Title=='Master'),'Age']= df.Age[df.Title=="Master"].mean()
df.loc[(df.Age.isnull())&(df.Title=='Miss'),'Age']= df.Age[df.Title=="Miss"].mean()
df.loc[(df.Age.isnull())&(df.Title=='Other'),'Age']= df.Age[df.Title=="Other"].mean()

In [514]:
# Change Female and Male to Integer Values
df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)

In [515]:
# Drop features
features_drop = ['PassengerId','Name','Title','Cabin','Ticket','Embarked','Ticket','Fare']
df = df.drop(features_drop, axis=1)

df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    int32  
 3   Age       891 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
dtypes: float64(1), int32(1), int64(4)
memory usage: 38.4 KB


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch
0,0,3,0,22.0,1,0
1,1,1,1,38.0,1,0
2,1,3,1,26.0,0,0
3,1,1,1,35.0,1,0
4,0,3,0,35.0,0,0


In [516]:
# Importing Classifier Modules

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [517]:
# Cross Validation

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
k_fold = KFold(n_splits=10, shuffle=True, random_state=5)

In [518]:
#Get accuracy using K-Nearest Neighbor
clf = KNeighborsClassifier(n_neighbors = 35)
scoring = 'accuracy'
score = cross_val_score(clf, df, target, cv=k_fold, n_jobs=1, scoring=scoring)
avg_score = np.average(score)

#KNN Percent Score
print(avg_score*100)

80.91635455680401


In [519]:
#Get accuracy using SVM
clf = SVC()
scoring = 'accuracy'
score = cross_val_score(clf, df, target, cv=k_fold, n_jobs=1, scoring=scoring)
avg_score = np.average(score)

#SVM Percent Score
print(avg_score*100)

76.54556803995007


In [520]:
## USING DEEP LEARNING

In [521]:
# Split and Scale X and Y training inputs

from sklearn.preprocessing import StandardScaler

X = df.drop(['Survived'], axis=1).values.astype(float)
scale = StandardScaler()
X = scale.fit_transform(X)
Y = df['Survived']

In [522]:
#Import Libraries for creating Model
from keras.models import Sequential
from keras.layers import Dense

def create_model(optimizer='adam', init='uniform'):
    # create model
    model = Sequential()
    model.add(Dense(16, input_dim=X.shape[1], kernel_initializer=init, activation='relu'))
    model.add(Dense(8, kernel_initializer=init, activation='relu'))
    model.add(Dense(4, kernel_initializer=init, activation='relu'))
    model.add(Dense(1, kernel_initializer=init, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [523]:
# Create a classifier
from keras.wrappers.scikit_learn import KerasClassifier

model_pred = KerasClassifier(build_fn=create_model, optimizer='rmsprop', init='glorot_uniform', epochs=50, batch_size=5, verbose=0)
model_pred.fit(X, Y)


<keras.callbacks.History at 0x172334d22b0>

In [524]:
# Read test data
test_df = df.drop('Survived', axis = 1)

# Create X_test
X_test = test_df.values.astype(float)
# Scaling
X_test = scale.transform(X_test)

# Predict 'Survived'
prediction = model_pred.predict(X_test)



In [525]:
#Print Classification Report
from sklearn.metrics import classification_report
print(classification_report(Y, prediction))

              precision    recall  f1-score   support

           0       0.84      0.91      0.87       549
           1       0.84      0.71      0.77       342

    accuracy                           0.84       891
   macro avg       0.84      0.81      0.82       891
weighted avg       0.84      0.84      0.83       891

