In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

In [2]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)



In [3]:
# Fitting Kernel SVM to the Training set
from sklearn.svm import SVC
classifier_svc = SVC(kernel = 'linear')
classifier_svc.fit(X_train, y_train)

# Predicting the Test set results
y_pred_svc = classifier_svc.predict(X_test)

In [4]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix,classification_report
print(confusion_matrix(y_test, y_pred_svc))
print(classification_report(y_test, y_pred_svc))

[[66  2]
 [ 8 24]]
             precision    recall  f1-score   support

          0       0.89      0.97      0.93        68
          1       0.92      0.75      0.83        32

avg / total       0.90      0.90      0.90       100



In [5]:
# Applying k-Fold Cross Validation
from sklearn.model_selection import cross_val_score
accuracies_svc = cross_val_score(estimator = classifier_svc, X = X_train, y = y_train, cv = 10)
print(accuracies_svc.mean())
print(accuracies_svc.std())

0.813848720801
0.0886304860456


In [6]:
# Applying Grid Search to find the best model and the best parameters
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
              {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]},
              {'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2,3,4]}]

grid_search = GridSearchCV(estimator = classifier_svc,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)

grid_search = grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

In [7]:
print(best_accuracy)
print(best_parameters)

0.903333333333
{'C': 1, 'gamma': 0.7, 'kernel': 'rbf'}


In [8]:
classifier_best = SVC(kernel = 'rbf', C=0.7, gamma=0.7)
classifier_best.fit(X_train, y_train)

# Predicting the Test set results
y_pred_cv = classifier_best.predict(X_test)

print(confusion_matrix(y_test, y_pred_cv))
print(classification_report(y_test, y_pred_cv))

[[64  4]
 [ 3 29]]
             precision    recall  f1-score   support

          0       0.96      0.94      0.95        68
          1       0.88      0.91      0.89        32

avg / total       0.93      0.93      0.93       100



In [13]:
# Fitting XGBoost to the Training set
# conda install py-xgboost

from xgboost import XGBClassifier
xgb_classifier = XGBClassifier(max_depth=3, n_estimators=300, learning_rate=0.05)
xgb_classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred_xgb = xgb_classifier.predict(X_test)

# Making the Confusion Matrix
print(confusion_matrix(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

# Applying k-Fold Cross Validation

accuracies_xgb = cross_val_score(estimator = xgb_classifier, X = X_train, y = y_train, cv = 10)
print(accuracies_xgb.mean())
print(accuracies_xgb.std())

[[65  3]
 [ 3 29]]
             precision    recall  f1-score   support

          0       0.96      0.96      0.96        68
          1       0.91      0.91      0.91        32

avg / total       0.94      0.94      0.94       100

0.903756025213
0.0558938893255
