# Using Kernel SVM to analyse the reliability of a salary-age dataset

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
#this is simply for coloring the confusion matrix. Green for successful prediction, red for failed prediction
import sys
from termcolor import colored, cprint

## Importing the dataset

In [None]:
dataset = pd.read_csv('Purchase_data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

- This code block reads our .csv file and places the data in 2 lists, one for the input(X) and the other for the output(Y)

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

- By amending the values of test_size, we can change the proportion of data used for the test set vs data set: 0.25 means 25% of the data for test set, and the rest for the training set

## Feature Scaling

- Feature scaling is used to map our input data/Independent Variables e.g. age) which can range from 0 to over 100, (to a smaller range, to enhance the accuracy of our model.

- This is an especially important step for datasets with multiple IVs of different magnitudes. Else, the model might deem an IV of greater order of magnitude to be more 'important'.



In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training the Kernel SVM model on the Training set

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

## Predicting a new result

In [None]:
print(classifier.predict(sc.transform([[30,87000]])))

[0]


## Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)
text = np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)
for item in text:
  if item[0] != item[1]:
    print(colored(item, 'red', attrs=['reverse','blink']))
  else:
    print(colored(item, 'green', attrs=['reverse', 'blink']))

[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[31m[1 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[31m[1 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[31m[0 1][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[1 1][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[32m[0 0][0m
[5m[7m[3

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[64  4]
 [ 3 29]]


0.93