In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, precision_score,recall_score,f1_score, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier,BaggingClassifier
from sklearn.svm import SVC

In [13]:
df = pd.read_csv('CAR_EVALUATION.csv')
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Target
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [14]:
df.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
Target      0
dtype: int64

In [15]:
df.shape

(1728, 7)

In [16]:
df['Target'].value_counts()# balancing the data in the data set

unacc    1210
acc       384
good       69
vgood      65
Name: Target, dtype: int64

In [17]:
from sklearn.utils import resample
df1 = df[df['Target'] == 'unacc']
df2 = df[df['Target']== 'acc']
df3 = df[df['Target']== 'good']
df4 = df[df['Target']== 'vgood']

df2_sam = resample(df2,n_samples=1210)
df3_sam = resample(df3,n_samples=1210)
df4_sam = resample(df4,n_samples=1210)

df = pd.concat([df1,df2_sam,df3_sam,df4_sam])
df = df.sample(frac=1,ignore_index=True)
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Target
0,low,low,2,4,med,high,good
1,low,high,4,more,med,high,vgood
2,med,low,4,more,small,high,good
3,high,high,4,more,small,med,unacc
4,med,low,5more,2,small,low,unacc
...,...,...,...,...,...,...,...
4835,low,med,3,4,small,high,good
4836,med,low,2,4,med,low,unacc
4837,high,low,4,more,med,high,acc
4838,low,low,3,4,small,high,good


In [18]:
cart =df.columns
enc =LabelEncoder()
for x in cart:
    df[x] = enc.fit_transform(df[x])
df    

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Target
0,1,1,0,1,1,0,1
1,1,0,2,2,1,0,3
2,2,1,2,2,2,0,1
3,0,0,2,2,2,2,2
4,2,1,3,0,2,1,2
...,...,...,...,...,...,...,...
4835,1,2,1,1,2,0,1
4836,2,1,0,1,1,1,2
4837,0,1,2,2,1,0,0
4838,1,1,1,1,2,0,1


In [19]:
x= df.drop('Target', axis=1)
y= df['Target']
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.20)

In [21]:
model1=ExtraTreesClassifier()
model1.fit(xtrain,ytrain)
pred1 = model1.predict(xtest)
print(classification_report(ytest,pred1))
print(confusion_matrix(ytest,pred1))
print('Accuracy:', accuracy_score(ytest,pred1))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       237
           1       1.00      1.00      1.00       237
           2       1.00      0.99      1.00       239
           3       1.00      1.00      1.00       255

    accuracy                           1.00       968
   macro avg       1.00      1.00      1.00       968
weighted avg       1.00      1.00      1.00       968

[[237   0   0   0]
 [  0 237   0   0]
 [  2   0 237   0]
 [  0   0   0 255]]
Accuracy: 0.9979338842975206


In [22]:
model1=GradientBoostingClassifier()
model1.fit(xtrain,ytrain)
pred1 = model1.predict(xtest)
print(classification_report(ytest,pred1))
print(confusion_matrix(ytest,pred1))
print('Accuracy:', accuracy_score(ytest,pred1))

              precision    recall  f1-score   support

           0       0.97      0.98      0.97       237
           1       0.98      1.00      0.99       237
           2       1.00      0.97      0.98       239
           3       1.00      1.00      1.00       255

    accuracy                           0.99       968
   macro avg       0.99      0.99      0.99       968
weighted avg       0.99      0.99      0.99       968

[[232   5   0   0]
 [  0 237   0   0]
 [  8   0 231   0]
 [  0   0   0 255]]
Accuracy: 0.9865702479338843


In [23]:
model1=AdaBoostClassifier()
model1.fit(xtrain,ytrain)
pred1 = model1.predict(xtest)
print(classification_report(ytest,pred1))
print(confusion_matrix(ytest,pred1))
print('Accuracy:', accuracy_score(ytest,pred1))

              precision    recall  f1-score   support

           0       0.90      0.11      0.20       237
           1       0.61      0.87      0.72       237
           2       0.74      0.86      0.80       239
           3       0.79      1.00      0.88       255

    accuracy                           0.72       968
   macro avg       0.76      0.71      0.65       968
weighted avg       0.76      0.72      0.65       968

[[ 26 103  71  37]
 [  0 207   0  30]
 [  3  30 206   0]
 [  0   0   0 255]]
Accuracy: 0.7169421487603306


In [24]:
model1=BaggingClassifier()
model1.fit(xtrain,ytrain)
pred1 = model1.predict(xtest)
print(classification_report(ytest,pred1))
print(confusion_matrix(ytest,pred1))
print('Accuracy:', accuracy_score(ytest,pred1))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       237
           1       1.00      1.00      1.00       237
           2       1.00      0.99      0.99       239
           3       1.00      1.00      1.00       255

    accuracy                           1.00       968
   macro avg       1.00      1.00      1.00       968
weighted avg       1.00      1.00      1.00       968

[[237   0   0   0]
 [  0 237   0   0]
 [  3   0 236   0]
 [  0   0   0 255]]
Accuracy: 0.996900826446281
