## Importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing dataset

In [2]:
dataset=pd.read_csv('Social_Network_Ads.csv')
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

## Spliting dataset train test set

In [3]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.25, random_state=0)

## Feature scaling

In [4]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)

## Training Support Vector Machine (rbf) model on Training set

In [5]:
from sklearn.svm import SVC
classifier=SVC(kernel='rbf',  random_state=0)
classifier.fit(x_train, y_train)

SVC(random_state=0)

## Predicting a new result

In [6]:
classifier.predict(sc.transform([[30, 87000]]))

array([0], dtype=int64)

## Predicting a Test set result

In [7]:
y_pred=classifier.predict(x_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]]


## Making confusion matrix

In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm= confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[64  4]
 [ 3 29]]


0.93

## Applying k-Fold Cross Validation

In [9]:
from sklearn.model_selection import cross_val_score
accuracies=cross_val_score(estimator=classifier, X=x_train, y=y_train, cv=10)
print('Accuracy:{:.2f} %'.format(accuracies.mean()*100))
print('Standard Deviation:{:.2f} %'.format(accuracies.std()*100))

Accuracy:90.33 %
Standard Deviation:6.57 %


## Applying Grid Search to find the best model and the best parameters

In [10]:
from sklearn.model_selection import GridSearchCV
parameters= [{'C':[0.25, 0.5, 0.75, 1],'kernel':['linear']},
             {'C':[0.25, 0.5, 0.75, 1],'kernel':['rbf'], 'gamma':[0.1, 0.2, 0.3, 0.4, 0.5, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search= GridSearchCV(estimator=classifier, 
                          param_grid=parameters, 
                          scoring='accuracy', cv=10, n_jobs=-1)
grid_search.fit(x_train, y_train)
best_accuracy=grid_search.best_score_
best_parameters=grid_search.best_params_
print('Best Accuracy:{:.2f} %'.format(best_accuracy*100))
print('Best Parameters:',best_parameters)

Best Accuracy:90.67 %
Best Parameters: {'C': 0.5, 'gamma': 0.6, 'kernel': 'rbf'}


## Visualizing the Training result

In [11]:
from matplotlib.colors import ListedColormap
x_set, y_set=sc.inverse_transform(x_train), y_train
x1, x2=np.meshgrid(np.arange(start=x_set[:,0].min()-10, stop=x_set[:,0].max()+10, step=0.25),
                   np.arange(start=x_set[:,1].min()-1000, stop=x_set[:,1].max()+1000, step=0.25))
plt.contourf(x1, x2, classifier.predict(sc.transform(np.array([x1.ravel(),x2.ravel()]).T)).reshape(x1.shape),
             alpha=0.75, cmap=ListedColormap(('red','green')))
plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(x_set[y_set==j,0], x_set[y_set==j,1], c=ListedColormap(('red', 'green'))(i), label=j )
    plt.title('Support Vector Machine (rbf)(Training set)')
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')
    plt.legend()
    plt.show

KeyboardInterrupt: 

## Visualizing the Test result

In [None]:
from matplotlib.colors import ListedColormap
x_set, y_set=sc.inverse_transform(x_test), y_test
x1, x2=np.meshgrid(np.arange(start=x_set[:,0].min()-10, stop=x_set[:,0].max()+10, step=0.25),
                   np.arange(start=x_set[:,1].min()-1000, stop=x_set[:,1].max()+1000, step=0.25))
plt.contourf(x1, x2, classifier.predict(sc.transform(np.array([x1.ravel(),x2.ravel()]).T)).reshape(x1.shape),
             alpha=0.75, cmap=ListedColormap(('red','green')))
plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())
for i,j in enumerate(np.unique(y_set)):
    plt.scatter(x_set[y_set==j,0], x_set[y_set==j,1], c=ListedColormap(('red', 'green'))(i), label=j )
    plt.title('Support Vector Machine (rbf)(Test set)')
    plt.xlabel('Age')
    plt.ylabel('Estimated Salary')
    plt.legend()
    plt.show