## Loading the standard libraries

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Load the data

In [19]:
data = pd.read_excel('Titanic.xlsx')
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,,Row Labels,Count of Sex
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,,female,314
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,,male,577
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,,Grand Total,891
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,,,


In [20]:
data.shape

(891, 15)

## Observations:

1. Survived is the target variable

## Data Cleaning and Data Preprocessing

In [21]:
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
Unnamed: 12    891
Unnamed: 13    887
Unnamed: 14    887
dtype: int64

In [22]:
cols = ['Cabin', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14']
data = data.drop(cols, axis = 1)
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S


In [23]:
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Embarked         2
dtype: int64

In [24]:
from sklearn.impute import SimpleImputer
si = SimpleImputer(strategy = 'median')
si

In [25]:
data['Age'] = si.fit_transform(data[['Age']])
data.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       2
dtype: int64

In [26]:
from sklearn.impute import SimpleImputer
si = SimpleImputer(strategy = 'most_frequent')
si

In [27]:
data['Embarked'] = si.fit_transform(data[['Embarked']])
data.isnull().sum()

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64

In [28]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S


In [30]:
## Deleting PassengerID and Ticket from the data

data = data.drop(['PassengerId', 'Ticket'], axis = 1)
data.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C
2,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,53.1,S
4,0,3,"Allen, Mr. William Henry",male,35.0,0,0,8.05,S


In [31]:
data = data.drop('Name', axis = 1)
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


## Feature Scaling

In [32]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
mms

In [33]:
data[['Age', 'Fare']] = mms.fit_transform(data[['Age', 'Fare']])
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,0.271174,1,0,0.014151,S
1,1,1,female,0.472229,1,0,0.139136,C
2,1,3,female,0.321438,0,0,0.015469,S
3,1,1,female,0.434531,1,0,0.103644,S
4,0,3,male,0.434531,0,0,0.015713,S


## Feature Encoding

In [34]:
dic = {'male' : 0, 'female' : 1}
data['Sex'] = data['Sex'].replace(dic)
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,0,0.271174,1,0,0.014151,S
1,1,1,1,0.472229,1,0,0.139136,C
2,1,3,1,0.321438,0,0,0.015469,S
3,1,1,1,0.434531,1,0,0.103644,S
4,0,3,0,0.434531,0,0,0.015713,S


In [35]:
data_ohe = pd.get_dummies(data['Embarked'])
data_ohe

Unnamed: 0,C,Q,S
0,0,0,1
1,1,0,0
2,0,0,1
3,0,0,1
4,0,0,1
...,...,...,...
886,0,0,1
887,0,0,1
888,0,0,1
889,1,0,0


In [36]:
data = pd.concat([data, data_ohe], axis = 1)
data = data.drop('Embarked', axis = 1)
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,C,Q,S
0,0,3,0,0.271174,1,0,0.014151,0,0,1
1,1,1,1,0.472229,1,0,0.139136,1,0,0
2,1,3,1,0.321438,0,0,0.015469,0,0,1
3,1,1,1,0.434531,1,0,0.103644,0,0,1
4,0,3,0,0.434531,0,0,0.015713,0,0,1


## Seperate X and y

In [37]:
X = data.drop('Survived', axis = 1)
y = data['Survived']

## split the data into train and test set

In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

## Apply SVM on train set

In [39]:
from sklearn.svm import SVC
svc = SVC()
svc

In [40]:
svc.fit(X_train, y_train)

## Perform Predictions

In [41]:
y_pred = svc.predict(X_test)
y_pred

array([0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0], dtype=int64)

In [42]:
X_test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,C,Q,S
495,3,0,0.346569,0,0,0.028221,1,0,0
648,3,0,0.346569,0,0,0.014737,0,0,1
278,3,0,0.082684,4,1,0.056848,0,1,0
31,1,1,0.346569,1,0,0.285990,1,0,0
255,3,1,0.359135,0,2,0.029758,1,0,0
...,...,...,...,...,...,...,...,...,...
263,1,0,0.497361,0,0,0.000000,0,0,1
718,3,0,0.346569,0,0,0.030254,0,1,0
620,3,0,0.334004,1,0,0.028213,1,0,0
786,3,1,0.220910,0,0,0.014631,0,0,1


## Evaluations

In [44]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_test)

0.7985074626865671

In [46]:
from sklearn.model_selection import cross_val_score
cross_val_score(svc, X, y, cv = 10)

array([0.8       , 0.82022472, 0.7752809 , 0.85393258, 0.83146067,
       0.78651685, 0.80898876, 0.78651685, 0.82022472, 0.79775281])

In [47]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[146,  22],
       [ 32,  68]], dtype=int64)