# Importing libraries
The necessary libraries needs to be imported in order to perform required analysis for further comparing the models of Decision tree, KNN, Logistic Regression and SVC.

In [1]:
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split, KFold, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, matthews_corrcoef,classification_report,roc_curve, auc
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from sklearn.preprocessing import Normalizer,MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
import scikitplot as skplt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Car Evaluation 

# Importing data

In [2]:
col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'acceptability']
cardata = pd.read_csv('car.data', names = col_names)

In [4]:
cardata.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,acceptability
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


# Taking Car Data as Numeric

# Data tranformation

In [6]:
cardata['buying'] = cardata['buying'].replace('vhigh', 3)
cardata['buying'] = cardata['buying'].replace('high', 2)
cardata['buying'] = cardata['buying'].replace('med', 1)
cardata['buying'] = cardata['buying'].replace('low', 0)
cardata['maint'] = cardata['maint'].replace('vhigh', 3)
cardata['maint'] = cardata['maint'].replace('high', 2)
cardata['maint'] = cardata['maint'].replace('med', 1)
cardata['maint'] = cardata['maint'].replace('low', 0)
cardata['doors'] = cardata['doors'].replace('5more', 3)
cardata['doors'] = cardata['doors'].replace('4', 2)
cardata['doors'] = cardata['doors'].replace('3', 1)
cardata['doors'] = cardata['doors'].replace('2', 0)
cardata['persons'] = cardata['persons'].replace('more', 2)
cardata['persons'] = cardata['persons'].replace('4', 1)
cardata['persons'] = cardata['persons'].replace('2', 0)
cardata['lug_boot'] = cardata['lug_boot'].replace('big', 2)
cardata['lug_boot'] = cardata['lug_boot'].replace('med', 1)
cardata['lug_boot'] = cardata['lug_boot'].replace('small', 0)
cardata['safety'] = cardata['safety'].replace('high', 2)
cardata['safety'] = cardata['safety'].replace('med', 1)
cardata['safety'] = cardata['safety'].replace('low', 0)
cardata['acceptability'] = cardata['acceptability'].replace('vgood', 3)
cardata['acceptability'] = cardata['acceptability'].replace('good', 2)
cardata['acceptability'] = cardata['acceptability'].replace('acc', 1)
cardata['acceptability'] = cardata['acceptability'].replace('unacc', 0)

In [7]:
cardata.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,acceptability
0,3,3,0,0,0,0,0
1,3,3,0,0,0,1,0
2,3,3,0,0,0,2,0
3,3,3,0,0,1,0,0
4,3,3,0,0,1,1,0


# Segmenting target and predictor data
Segmented the dataset in target data and predictor data. Here the target data is column "acceptability" and predictor data consist all other columns.

In [8]:
X = cardata.drop(['acceptability'], axis = 'columns')
Y = cardata['acceptability']

In [9]:
X.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,0,0,0,0
1,3,3,0,0,0,1
2,3,3,0,0,0,2
3,3,3,0,0,1,0
4,3,3,0,0,1,1


In [10]:
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: acceptability, dtype: int64

# Spliting target and predictor data into training and testing 
Splitted the target data and predictor data into training and testing, with 67 % data as training and 33 % as testing. 

In [196]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33)

# Scaling the data using MinMaxScaler
The data is scaled using MinMaxScaler on a range of 0 to 1.This process is same for all models. Although Decision Tree model doesn't require data to be scaled but for keeping the dataset uniform and on same scale for all models the same scaled data is used for Decision Tree model as well. 

In [197]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Decision Tree

## Finding Best hyperparameters
The parameters used are "max_depth" in the range of 1 to 30, "criterion" either entropy or gini, 'min_samples_leaf' in the range of 1 to 30. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [198]:
clf = DecisionTreeClassifier()
p_grid = {"max_depth": list(range(1,30)), "criterion" : ['entropy','gini'], 'min_samples_leaf' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'criterion': 'entropy', 'max_depth': 22, 'min_samples_leaf': 1}
[1mBest Score:[0;0m 0.9732065687121867


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [199]:
classifier_d = DecisionTreeClassifier(criterion = finalparam['criterion'],
                                      max_depth = finalparam['max_depth'],
                                      min_samples_leaf = finalparam['min_samples_leaf'])
classifier_d.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy:" + "\033[0;0m", accuracy_score(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_d, X_train_scaled, Y_train, cv = 10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_d,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_d, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_d.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy:[0;0m 0.9859894921190894
**************************************************
[0.95726496 0.98290598 0.93965517 0.97413793 0.99137931 0.96521739
 0.9826087  0.99130435 0.96521739 0.95652174]
**************************************************
[1mcv Score:[0;0m 0.97 (+/- 0.04)
**************************************************
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       410
           1       0.98      0.96      0.97       124
           2       0.83      0.88      0.86        17
           3       1.00      1.00      1.00        20

    accuracy                           0.99       571
   macro avg       0.95      0.96      0.96       571
weighted avg       0.99      0.99      0.99       571

**************************************************
[1mConfusion Matrix
[0;0m [[409   1   0   0]
 [  2 119   3   0]
 [  0   2  15   0]
 [  0   0   0  20]]
****************************************

# KNN

## Finding Best hyperparameters
The parameters used are "n_neighbors" in the range of 1 to 30, "weights" either entropy or gini. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [200]:
clf = KNeighborsClassifier()
p_grid = {'weights':["uniform", "distance"], 'n_neighbors' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'n_neighbors': 13, 'weights': 'distance'}
[1mBest Score:[0;0m 0.9515989628349178


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [201]:
classifier_k = KNeighborsClassifier(n_neighbors= finalparam['n_neighbors'], weights= finalparam['weights'])
classifier_k.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_k, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_k,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_k, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_k.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.9562171628721541
**************************************************
[0.93162393 0.95726496 0.94827586 0.95689655 0.93103448 0.9826087
 0.97391304 0.95652174 0.93043478 0.94782609]
**************************************************
[1mcv Score:[0;0m 0.95 (+/- 0.03)
**************************************************
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       410
           1       0.88      0.93      0.91       124
           2       0.87      0.76      0.81        17
           3       0.94      0.80      0.86        20

    accuracy                           0.96       571
   macro avg       0.92      0.87      0.89       571
weighted avg       0.96      0.96      0.96       571

**************************************************
[1mConfusion Matrix
[0;0m [[402   8   0   0]
 [  7 115   2   0]
 [  0   3  13   1]
 [  0   4   0  16]]
****************************************

# Logistic Regression

## Finding Best hyperparameters
The parameters used are "C" in the range of 1 to 30, "penalty" either l1 or l2. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code.

In [202]:
clf = LogisticRegression()
p_grid = {'penalty':["l1", "l2"], 'C' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'C': 9, 'penalty': 'l1'}
[1mBest Score:[0;0m 0.7960242005185826


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [203]:
classifier_l = LogisticRegression(C= finalparam['C'], penalty = finalparam['penalty'])
classifier_l.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_l, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_l,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_l, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_l.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.8371278458844134
**************************************************
[0.84615385 0.8034188  0.82758621 0.82758621 0.76724138 0.73913043
 0.80869565 0.7826087  0.75652174 0.8       ]
**************************************************
[1mcv Score:[0;0m 0.80 (+/- 0.06)
**************************************************
              precision    recall  f1-score   support

           0       0.88      0.95      0.91       410
           1       0.70      0.57      0.63       124
           2       0.56      0.29      0.38        17
           3       0.80      0.60      0.69        20

    accuracy                           0.84       571
   macro avg       0.73      0.60      0.65       571
weighted avg       0.83      0.84      0.83       571

**************************************************
[1mConfusion Matrix
[0;0m [[390  14   4   2]
 [ 53  71   0   0]
 [  2   9   5   1]
 [  0   8   0  12]]
***************************************

# SVM

## Finding Best hyperparameters
The parameters used are "C" in the range of 1 to 30, "gamma" either auto or scale, 'degree' in the range of 1 to 30 and 'kernal' either linear or poly or rbf or sigmoid. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [204]:
clf = SVC()
p_grid = {'kernel':['linear', 'poly', 'rbf', 'sigmoid' ],
          'C' : list(range(1,30)), 'degree': list(range(3,8)), 'gamma': ['auto', 'scale']}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'C': 5, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
[1mBest Score:[0;0m 0.9835782195332757


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [205]:
classifier_S = SVC(kernel= finalparam['kernel'], C = finalparam['C'], degree = finalparam['degree'],
                   gamma = finalparam['gamma'], probability = True)
classifier_S.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_S, X_train_scaled, Y_train, cv=10, scoring='accuracy'))
print('*' * 50)
print('\033[1m' + "CV Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_S,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring='accuracy').mean(),
                                       cross_val_score(classifier_S, X_train_scaled, Y_train,
                                                       cv=10, scoring='accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_S.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.9877408056042032
**************************************************
[0.98290598 1.         0.94827586 0.96551724 0.99137931 0.99130435
 0.99130435 1.         0.9826087  0.9826087 ]
**************************************************
[1mCV Score:[0;0m 0.98 (+/- 0.03)
**************************************************
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       410
           1       0.98      0.98      0.98       124
           2       0.88      0.88      0.88        17
           3       0.91      1.00      0.95        20

    accuracy                           0.99       571
   macro avg       0.94      0.96      0.95       571
weighted avg       0.99      0.99      0.99       571

**************************************************
[1mConfusion Matrix
[0;0m [[407   3   0   0]
 [  0 122   2   0]
 [  0   0  15   2]
 [  0   0   0  20]]
***************************************

# Multinomial NB

## Finding Best hyperparameters
The parameter used is 'alpha' either 1 or 1e-1 or 1e-2 . GridsearchCV function takes the classifier and all values of parameter and gives the best value for parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code.  

In [208]:
clf = MultinomialNB()
p_grid = {'alpha': [1, 1e-1, 1e-2]}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'alpha': 1}
[1mBest Score:[0;0m 0.6914433880726015


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [209]:
classifier_N = MultinomialNB(alpha = finalparam['alpha'])
classifier_N.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_N, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_N,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_N, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_N.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.7180385288966725
**************************************************
[0.68376068 0.68376068 0.68965517 0.68965517 0.68965517 0.69565217
 0.69565217 0.69565217 0.69565217 0.69565217]
**************************************************
[1mcv Score:[0;0m 0.69 (+/- 0.01)
**************************************************
              precision    recall  f1-score   support

           0       0.72      1.00      0.84       410
           1       0.00      0.00      0.00       124
           2       0.00      0.00      0.00        17
           3       0.00      0.00      0.00        20

    accuracy                           0.72       571
   macro avg       0.18      0.25      0.21       571
weighted avg       0.52      0.72      0.60       571

**************************************************
[1mConfusion Matrix
[0;0m [[410   0   0   0]
 [124   0   0   0]
 [ 17   0   0   0]
 [ 20   0   0   0]]
***************************************

# Taking Car Data as Categories

# Data tranformation

In [11]:
cardata1 = cardata
buying = pd.get_dummies(cardata1.buying)
maint = pd.get_dummies(cardata1.maint)
doors = pd.get_dummies(cardata1.doors)
persons = pd.get_dummies(cardata1.persons)
lug_boot = pd.get_dummies(cardata1.lug_boot)
safety = pd.get_dummies(cardata1.safety)
buying.columns = ['buying_high', 'buying_low', 'buying_med', 'buying_vhigh']
maint.columns = ['maint_high', 'maint_low', 'maint_med', 'maint_vhigh']
doors.columns = ['doors_2', 'doors_3', 'doors_4', 'doors_5more']
persons.columns = ['persons_2', 'persons_4', 'persons_more']
lug_boot.columns = ['lug_boot_big', 'lug_boot_med', 'lug_boot_small']
safety.columns = ['safety_high', 'safety_low', 'safety_med']
cardata1new = pd.concat([cardata1,buying,maint,doors,persons,lug_boot,safety], axis = 'columns')
cardata1new = cardata1new.drop(['buying','maint','doors','persons','lug_boot','safety'], axis = 'columns')
cardata1new['acceptability'] = cardata1new['acceptability'].replace('vgood', 3)
cardata1new['acceptability'] = cardata1new['acceptability'].replace('good', 2)
cardata1new['acceptability'] = cardata1new['acceptability'].replace('acc', 1)
cardata1new['acceptability'] = cardata1new['acceptability'].replace('unacc', 0)

In [12]:
cardata1new.head()

Unnamed: 0,acceptability,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
0,0,0,0,0,1,0,0,0,1,1,...,0,1,0,0,1,0,0,1,0,0
1,0,0,0,0,1,0,0,0,1,1,...,0,1,0,0,1,0,0,0,1,0
2,0,0,0,0,1,0,0,0,1,1,...,0,1,0,0,1,0,0,0,0,1
3,0,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,1,0,1,0,0
4,0,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0


# Segmenting target and predictor data
Segmented the dataset in target data and predictor data. Here the target data is column "acceptability" and predictor data consist all other columns.

In [13]:
X = cardata1new.drop(['acceptability'], axis = 'columns')
Y = cardata1new['acceptability']

In [14]:
X.head()

Unnamed: 0,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
0,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,1,0,0,1,0,0
1,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,1,0,0,0,1,0
2,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,1,0,0,0,0,1
3,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,1,0,0
4,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,0,1,0


In [15]:
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: acceptability, dtype: int64

# Spliting target and predictor data into training and testing 
Splitted the target data and predictor data into training and testing, with 67 % data as training and 33 % as testing. 

In [212]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33)

# Scaling the data using MinMaxScaler
The data is scaled using MinMaxScaler on a range of 0 to 1.This process is same for all models. Although Decision Tree model doesn't require data to be scaled but for keeping the dataset uniform and on same scale for all models the same scaled data is used for Decision Tree model as well. 

In [213]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Decision Tree

## Finding Best hyperparameters
The parameters used are "max_depth" in the range of 1 to 30, "criterion" either entropy or gini, 'min_samples_leaf' in the range of 1 to 30. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [214]:
clf = DecisionTreeClassifier()
p_grid = {"max_depth": list(range(1,30)), "criterion" : ['entropy','gini'], 'min_samples_leaf' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'criterion': 'entropy', 'max_depth': 21, 'min_samples_leaf': 1}
[1mBest Score:[0;0m 0.9680207433016422


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [215]:
classifier_d = DecisionTreeClassifier(criterion = finalparam['criterion'],
                                      max_depth = finalparam['max_depth'],
                                      min_samples_leaf = finalparam['min_samples_leaf'])
classifier_d.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy:" + "\033[0;0m", accuracy_score(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_d, X_train_scaled, Y_train, cv = 10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_d,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_d, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_d.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_d.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy:[0;0m 0.9632224168126094
**************************************************
[0.97435897 0.97435897 0.96581197 0.95689655 0.93965517 0.93913043
 0.96521739 1.         0.97391304 0.98245614]
**************************************************
[1mcv Score:[0;0m 0.97 (+/- 0.03)
**************************************************
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       390
           1       0.95      0.91      0.93       139
           2       0.67      0.88      0.76        16
           3       0.90      1.00      0.95        26

    accuracy                           0.96       571
   macro avg       0.88      0.94      0.90       571
weighted avg       0.97      0.96      0.96       571

**************************************************
[1mConfusion Matrix
[0;0m [[383   6   1   0]
 [  4 127   6   2]
 [  0   1  14   1]
 [  0   0   0  26]]
****************************************

# KNN

## Finding Best hyperparameters
The parameters used are "n_neighbors" in the range of 1 to 30, "weights" either entropy or gini. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [216]:
clf = KNeighborsClassifier()
p_grid = {'weights':["uniform", "distance"], 'n_neighbors' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'n_neighbors': 9, 'weights': 'distance'}
[1mBest Score:[0;0m 0.9127052722558341


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [217]:
classifier_k = KNeighborsClassifier(n_neighbors= finalparam['n_neighbors'], weights= finalparam['weights'])
classifier_k.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_k, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_k,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_k, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_k.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_k.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.9054290718038529
**************************************************
[0.91452991 0.98290598 0.8974359  0.88793103 0.92241379 0.90434783
 0.89565217 0.91304348 0.90434783 0.90350877]
**************************************************
[1mcv Score:[0;0m 0.91 (+/- 0.05)
**************************************************
              precision    recall  f1-score   support

           0       0.94      0.99      0.96       390
           1       0.83      0.80      0.82       139
           2       0.50      0.38      0.43        16
           3       1.00      0.54      0.70        26

    accuracy                           0.91       571
   macro avg       0.82      0.68      0.73       571
weighted avg       0.90      0.91      0.90       571

**************************************************
[1mConfusion Matrix
[0;0m [[386   4   0   0]
 [ 24 111   4   0]
 [  1   9   6   0]
 [  1   9   2  14]]
***************************************

# Logistic Regression

## Finding Best hyperparameters
The parameters used are "C" in the range of 1 to 30, "penalty" either l1 or l2. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code.

In [218]:
clf = LogisticRegression()
p_grid = {'penalty':["l1", "l2"], 'C' : list(range(1,30))}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'C': 24, 'penalty': 'l1'}
[1mBest Score:[0;0m 0.8980121002592912


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [219]:
classifier_l = LogisticRegression(C= finalparam['C'], penalty = finalparam['penalty'])
classifier_l.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_l, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_l,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_l, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_l.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_l.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.8861646234676007
**************************************************
[0.87179487 0.94871795 0.88034188 0.88793103 0.9137931  0.90434783
 0.90434783 0.89565217 0.86086957 0.9122807 ]
**************************************************
[1mcv Score:[0;0m 0.90 (+/- 0.05)
**************************************************
              precision    recall  f1-score   support

           0       0.94      0.96      0.95       390
           1       0.77      0.78      0.77       139
           2       0.36      0.31      0.33        16
           3       0.91      0.77      0.83        26

    accuracy                           0.89       571
   macro avg       0.75      0.70      0.72       571
weighted avg       0.88      0.89      0.88       571

**************************************************
[1mConfusion Matrix
[0;0m [[373  16   1   0]
 [ 22 108   8   1]
 [  0  10   5   1]
 [  0   6   0  20]]
***************************************

# SVM

## Finding Best hyperparameters
The parameters used are "C" in the range of 1 to 30, "gamma" either auto or scale, 'degree' in the range of 1 to 30 and 'kernal' either linear or poly or rbf or sigmoid. GridsearchCV function takes the classifier and all values of parameters and gives the best value for each parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [220]:
clf = SVC()
p_grid = {'kernel':['linear', 'poly', 'rbf', 'sigmoid' ],
          'C' : list(range(1,30)), 'degree': list(range(3,8)), 'gamma': ['auto', 'scale']}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'C': 2, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
[1mBest Score:[0;0m 0.996542783059637


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [221]:
classifier_S = SVC(kernel= finalparam['kernel'], C = finalparam['C'], degree = finalparam['degree'],
                   gamma = finalparam['gamma'], probability = True)
classifier_S.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_S, X_train_scaled, Y_train, cv=10, scoring='accuracy'))
print('*' * 50)
print('\033[1m' + "CV Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_S,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring='accuracy').mean(),
                                       cross_val_score(classifier_S, X_train_scaled, Y_train,
                                                       cv=10, scoring='accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_S.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_S.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.9929947460595446
**************************************************
[1.         1.         0.99145299 0.99137931 1.         1.
 0.99130435 1.         1.         0.99122807]
**************************************************
[1mCV Score:[0;0m 1.00 (+/- 0.01)
**************************************************
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       390
           1       1.00      0.97      0.99       139
           2       0.80      1.00      0.89        16
           3       1.00      1.00      1.00        26

    accuracy                           0.99       571
   macro avg       0.95      0.99      0.97       571
weighted avg       0.99      0.99      0.99       571

**************************************************
[1mConfusion Matrix
[0;0m [[390   0   0   0]
 [  0 135   4   0]
 [  0   0  16   0]
 [  0   0   0  26]]
***********************************************

# Multinomial NB

## Finding Best hyperparameters
The parameter used is 'alpha' either 1 or 1e-1 or 1e-2 . GridsearchCV function takes the classifier and all values of parameter and gives the best value for parameter after a certain no. of iterations which is in our case taken as 10 (cv = 10). The best paramters with the best score on training data are printed after the following code. 

In [222]:
clf = MultinomialNB()
p_grid = {'alpha': [1, 1e-1, 1e-2]}
grid = GridSearchCV(estimator = clf, param_grid = p_grid, cv = 10)
grid.fit(X_train_scaled, Y_train)
finalparam = grid.best_params_
print('\033[1m' + 'Best Hyperparamters:' + "\033[0;0m", grid.best_params_ )
print('\033[1m' + 'Best Score:' + "\033[0;0m", grid.best_score_)

[1mBest Hyperparamters:[0;0m {'alpha': 1}
[1mBest Score:[0;0m 0.8504753673293


## Using the best hyperparameters in building the final model.
The final model is build by using the best parameters generated using gridsearch. Below is the summary of the model.

In [223]:
classifier_N = MultinomialNB(alpha = finalparam['alpha'])
classifier_N.fit(X_train_scaled, Y_train)
print('\033[1m' + "GOODNESS OF MODEL" + "\033[0;0m")
print('\033[1m' + "Accuracy :" + "\033[0;0m", accuracy_score(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print(cross_val_score(classifier_N, X_train_scaled, Y_train, cv=10, scoring = 'accuracy'))
print('*' * 50)
print('\033[1m' + "cv Score:" + "\033[0;0m"" %0.2f (+/- %0.2f)" % (cross_val_score(classifier_N,
                                                                                   X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').mean(),
                                       cross_val_score(classifier_N, X_train_scaled, Y_train,
                                                       cv=10, scoring = 'accuracy').std() * 2))
print('*' * 50)
print(classification_report(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Confusion Matrix\n' + "\033[0;0m", confusion_matrix(Y_test, classifier_N.predict(X_test_scaled)))
print('*' * 50)
print('\033[1m' + 'Matthews Corrcoef' + "\033[0;0m", matthews_corrcoef(Y_test, classifier_N.predict(X_test_scaled)))

[1mGOODNESS OF MODEL[0;0m
[1mAccuracy :[0;0m 0.830122591943958
**************************************************
[0.81196581 0.88034188 0.83760684 0.85344828 0.87068966 0.86086957
 0.86086957 0.82608696 0.82608696 0.87719298]
**************************************************
[1mcv Score:[0;0m 0.85 (+/- 0.05)
**************************************************
              precision    recall  f1-score   support

           0       0.88      0.97      0.92       390
           1       0.68      0.62      0.65       139
           2       0.45      0.31      0.37        16
           3       0.86      0.23      0.36        26

    accuracy                           0.83       571
   macro avg       0.72      0.53      0.58       571
weighted avg       0.82      0.83      0.82       571

**************************************************
[1mConfusion Matrix
[0;0m [[377  12   1   0]
 [ 49  86   4   0]
 [  0  10   5   1]
 [  0  19   1   6]]
****************************************

# Comparing Accuracies for Numeric Data and Categorical Data for all Classifiers

In [231]:
df1 = pd.DataFrame({"Accuracy for Numeric data":[0.9859,0.9562,0.8371,0.9877,0.7180],
                    "Accuracy for Categorical data":[0.9632,0.9054,0.8861,0.9929,0.8301]})
df1.index = ['Decision Tree', 'KNN', 'Logistic Regression', 'SVM', 'Multinomial NB']
df1

Unnamed: 0,Accuracy for Numeric data,Accuracy for Categorical data
Decision Tree,0.9859,0.9632
KNN,0.9562,0.9054
Logistic Regression,0.8371,0.8861
SVM,0.9877,0.9929
Multinomial NB,0.718,0.8301


# Model Evalutaion - All Models
The above table describes the "Accuracy for Numeric data" and "Accuracy for Categorical data" for all models i.e Decision Tree, KNN, Logistic Regression, SVM and Multinomial NB respectively. It cas be observed that for Numeric Data Multinomial NB and Logistic Regression has the lowest accuracy amongst all models. On the other hand Decision Tree, KNN and SVM has good accuracy with SVM having the highest accuracy. Thus SVM should be finalzed for the analysis.
For Categorical Data Multinomial NB, Logistic Regression and KNN has the lowest accuracy amongst all models. On the other hand Decision Tree and SVM has good accuracy with SVM having the highest accuracy. Thus SVM should be finalzed for the analysis.
Between Numeric Data and Categorial Data, Categorical Data should be opted while considering multiclass problem since it divides the data into categories rather considering value of one category higher than value of other category which happen in case of Numeric Data. It can also be observed that SVM when applied to Categorical Data gives the highest accuracy of 0.9929 which is very good for any prediction. 