# Important Library

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , confusion_matrix
from sklearn import svm

# Loading Data Set

In [3]:
data = pd.read_csv("train.csv")

In [4]:
data.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [5]:
data.shape

(891, 12)

# Converting categorical variable into numerical

In [6]:
le = LabelEncoder()

In [7]:
data['Sex'] = le.fit_transform(data['Sex'])

In [8]:
data['Embarked'] = le.fit_transform(data['Embarked'].astype('str'))

In [9]:
data.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,0


# Handling Missing value

In [10]:
data['Age'].mean()

29.69911764705882

In [11]:
age_new_var = np.where(data['Age'],29,data['Age'])

In [12]:
data['Age'] = age_new_var

In [13]:
data['Embarked'] = data['Embarked'].fillna(method='bfill')

In [14]:
data.isna().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         0
dtype: int64

In [15]:
data = data.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)

In [16]:
data.columns

Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked'],
      dtype='object')

In [17]:
X = data.drop('Pclass',axis=1)
y = data['Pclass']

# Spliting data into train and test

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)

In [19]:
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

(623, 7) (268, 7) (623,) (268,)


# SVM classification Algorithm

In [20]:
SVM = svm.SVC(gamma= 0.01,C=100)

In [21]:
SVM.fit(X_train,y_train).score(X_test,y_test)

0.9104477611940298

In [22]:
y_pred = SVM.predict(X_test)

In [23]:
confusion_matrix(y_pred,y_test)

array([[ 57,   4,   1],
       [  3,  43,  11],
       [  0,   5, 144]], dtype=int64)

In [25]:
X_test.shape[0]

268

In [26]:
(57+43+144)/268

0.9104477611940298

# SVM Classifier General Function

In [60]:
def SVM_Classifier(X,y):
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)
    SVM.fit(X_train,y_train)
    y_pred = SVM.predict(X_test)
    
    print("Maximum Accuracy : ",accuracy_score(y_pred,y_test))
    print("Confusion Matrix \n",confusion_matrix(y_pred,y_test))
    
  

In [28]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.9104477611940298
Confusion Matrix 
 [[ 57   4   1]
 [  3  43  11]
 [  0   5 144]]


In [31]:
(57+43+144)/268

0.9104477611940298

In [32]:
# DV -> Survived
# IDV -> Remaining
X = data.drop('Survived',axis=1)
y = data['Survived']

In [33]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.7611940298507462
Confusion Matrix 
 [[136  32]
 [ 32  68]]


In [35]:
# DV -> Pclass 
# IDV -> remaining
X = data.drop('Pclass',axis=1)
y = data['Pclass']

In [36]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.9104477611940298
Confusion Matrix 
 [[ 57   4   1]
 [  3  43  11]
 [  0   5 144]]


In [37]:
# DV ->  Sex
# IDV -> remaining
X = data.drop('Sex',axis=1)
y = data['Sex']

In [38]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.7947761194029851
Confusion Matrix 
 [[ 65  21]
 [ 34 148]]


In [52]:
# DV -> SibSp 
# IDV -> remaining
X = data.drop('SibSp',axis=1)
y = data['SibSp']

In [53]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.8059701492537313
Confusion Matrix 
 [[162  23   2   0   1   0]
 [ 19  42   3   2   0   0]
 [  0   0   0   0   0   0]
 [  0   0   2   4   0   0]
 [  0   0   0   0   6   0]
 [  0   0   0   0   0   2]]


In [63]:
# DV -> Parch 
# IDV -> remaining
X = data.drop('Parch',axis=1)
y = data['Parch']

In [64]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.8097014925373134
Confusion Matrix 
 [[193  25   7   0   0]
 [  8  12   2   0   0]
 [  3   3  12   2   1]
 [  0   0   0   0   0]
 [  0   0   0   0   0]]


In [68]:
# DV -> Embarked
# IDV -> remaining
X = data.drop('Embarked',axis=1)
y = data['Embarked']

In [69]:
SVM_Classifier(X,y)

Maximum Accuracy :  0.7425373134328358
Confusion Matrix 
 [[ 18   1   7]
 [  0   1   0]
 [ 42  19 180]]
