# Continuous Classifier - SVM & Kneighbour comparison

# 1. Preparing data

In [None]:
import pandas as pd

In [None]:
dataset = pd.read_csv("data.csv", index_col="PassengerId")
dataset.head()

In [None]:
dataset['Sex'] = dataset['Sex'].astype('category')
dataset['Pclass'] = dataset['Pclass'].replace({1: 'Upper', 2: 'Middle', 3: 'Lower'}).astype('category')
dataset['Embarked'] = dataset['Embarked'].replace({'S':'Southampton', 'C':'Cherbourg', 'Q':'Queenstown'}).astype('category')

In [None]:
d_dataset = pd.get_dummies(dataset.drop(["Name", "Ticket", "Cabin"], 1))
d_dataset.head()

In [None]:
d_dataset[d_dataset.isnull().any(axis=1)]

In [None]:
d_dataset.interpolate(inplace=True)

In [None]:
X = d_dataset.drop(["Survived"], 1)
Y = d_dataset["Survived"]

# 2. SVM classifier

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
model = SVC()
cross_val_score(model, X, Y, cv=5, n_jobs=-1)

# 3. KNeigbours

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
model2 = KNeighborsClassifier()
cross_val_score(model, X, Y, cv=5, n_jobs=-1)

# 4. Finding better hyperparameters

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
parameters = {
      "n_neighbors": [3, 5, 10, 15, 20],
      "weights" : ['uniform', 'distance'],
      "p" : [1, 2,3,4]
}

In [None]:
neighbors_model = KNeighborsClassifier()
clf = GridSearchCV(neighbors_model, parameters)
clf.fit(X, Y)

In [None]:
clf.best_params_

In [None]:
model2 = KNeighborsClassifier(n_neighbors=5, p=1, weights='uniform')
cross_val_score(model2, X, Y, cv=5, n_jobs=-1)