# Grid Search

## Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## importing the dataset

In [3]:
dataset = pd.read_excel("./Expenditure-churn (3).xlsx")
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       10000 non-null  int64  
 1   gender    10000 non-null  int64  
 2   marital   10000 non-null  float64
 3   dep       10000 non-null  int64  
 4   Income    10000 non-null  float64
 5   Job yrs   10000 non-null  int64  
 6   Town yrs  10000 non-null  int64  
 7   Yrs Ed    10000 non-null  int64  
 8   Dri Lic   10000 non-null  int64  
 9   Own Home  10000 non-null  int64  
 10  # Cred C  10000 non-null  int64  
 11  Churn     10000 non-null  int64  
dtypes: float64(2), int64(10)
memory usage: 937.6 KB


In [5]:
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

## Splitting the dataset into Training and Testing Set

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

## Feature Scaling

In [9]:
from sklearn.preprocessing import StandardScaler
svc = StandardScaler()
X_train = svc.fit_transform(X_train)
X_test = svc.transform(X_test)

## Training the Kernel SVM model on the Training Set

In [10]:
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

## Making the confusion matrix

In [13]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm, '\n Accuracy: ', accuracy_score(y_test, y_pred)*100)

[[1671   11]
 [  38  780]] 
 Accuracy:  98.04


## Applying Grid Search to find the best model and best Parameters

In [16]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, .5, .75, 1], 'kernel':['linear']},
              {'C': [0.25, .5, .75, 1], 'kernel':['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator=classifier,
                           param_grid=parameters,
                           scoring='accuracy',
                           cv=10,
                           n_jobs=-1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print(f"Best Accuracy: {best_accuracy*100}")
print("Best Parameters:", best_parameters)

Best Accuracy: 99.54666666666667
Best Parameters: {'C': 1, 'kernel': 'linear'}


In [15]:
X_train.shape, y_train.shape

((7500, 11), (7500,))

## Fit a new SVC using the new parameters

In [17]:
from sklearn.svm import SVC
classifier = SVC(kernel='linear', random_state=0)
classifier.fit(X_train, y_train)

# Making a confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm, '\n Accuracy: ', accuracy_score(y_test, y_pred)*100)

[[1681    1]
 [  10  808]] 
 Accuracy:  99.56


## Precision and Recall and the F1 Score
The F1 score is the harmonic mean of precision and recall

In [27]:
# precison and recall
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# precison
print("Precision: ", precision_score(y_test, y_pred)*100)
# Recall
print("Recall:    ", recall_score(y_test, y_pred)*100)

Precision:  99.87639060568603
Recall:     98.77750611246944


In [30]:
# f1 Score
print("F1 Score: ", f1_score(y_test, y_pred)*100)

F1 Score:  99.3239090350338


In [29]:
# roc_auc score

print(roc_auc_score(y_test, y_pred)*100)

99.3590265401824


## Applying K-fold cross validation

In [31]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 99.55 %
Standard Deviation: 0.19 %


In [32]:
accuracies

array([0.99733333, 0.99466667, 0.99866667, 0.99466667, 0.992     ,
       0.996     , 0.99733333, 0.99333333, 0.996     , 0.99466667])