# Car Evaluation

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.linear_model._logistic import LogisticRegression

In [6]:
car_data = pd.read_csv('data files/car.csv')

In [7]:
car_data.shape

In [8]:
car_data.describe(exclude='number')

In [9]:
car_data.info()

In [10]:
car_data.label.value_counts()

In [11]:
car_data.label = ['acc' if((l=='good') | (l=='vgood') | (l=='acc')) else 'unacc' for l in car_data.label]

In [12]:
car_data = pd.concat([
    car_data[car_data.label == 'acc'],
    car_data[car_data.label == 'unacc'].sample(n=518)
])

In [13]:
car_data

In [14]:
car_data.label.value_counts()

In [15]:
le = LabelEncoder()
car_data.label  = le.fit_transform(car_data.label )

In [16]:
car_data.label.value_counts()

In [17]:
X_train,X_test,y_train,y_test = train_test_split(car_data.drop(columns=['label']),
                                                 car_data['label'],
                                                 test_size=0.2,
                                                random_state=0)

In [18]:
X_train

In [19]:
ohe = OrdinalEncoder()
ohe.fit(car_data[['buying', 'maint', 'lug_boot', 'safety', 'persons', 'doors']])

In [20]:
# columTransformer = ColumnTransformer(transformers=[
#     ('tf1', OrdinalEncoder(categories=[['low', 'med','high','vhigh']]),['buying', 'maint']),
#     ('tf2', OrdinalEncoder(categories=[['small', 'med', 'big']]),['lug_boot']),
#     ('tf3', OrdinalEncoder(categories=[['low', 'med', 'high']]), ['safety']),
#     ('tf6', OrdinalEncoder(categories=[['2', '4', 'more']]), ['persons']),
#     ('tf7', OrdinalEncoder(categories=[['2', '3', '4','5more']]), ['doors'])
# ], remainder='passthrough')

columTransformer = ColumnTransformer(transformers=
    [
        ('tf1', OrdinalEncoder(categories=ohe.categories_),['buying', 'maint', 'lug_boot', 'safety', 'persons', 'doors'])
 
 ],
 remainder='passthrough')

In [21]:
y_test

In [22]:
# min_max_trans = ColumnTransformer([
#     ('tf5', MinMaxScaler(),slice(0,10))
# ], remainder='passthrough')

In [23]:
X_train = columTransformer.fit_transform(X_train)

In [24]:
X_train

In [25]:
X_test = columTransformer.fit_transform(X_test)

In [26]:
#Decision Tree Classifier

tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

In [27]:
#Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train,100)
y_pred = rf.predict(X_test)
print("RandomForestClassifier\n", accuracy_score(y_pred, y_test))
cm= confusion_matrix(y_pred, y_test)
print(classification_report(y_pred, y_test))

In [28]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=rf.classes_)

disp.plot()
plt.xlabel('RandomForestClassifier')
plt.show()

In [29]:
#Random Forest Classifier
lr = LogisticRegression()
lr.fit(X_train, y_train,100)
y_pred = lr.predict(X_test)
print("LogisticRegression\n", accuracy_score(y_pred, y_test))
cm= confusion_matrix(y_pred, y_test)
print(classification_report(y_pred, y_test))

In [30]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=lr.classes_)
disp.plot()
plt.show()

In [31]:
#Random Forest Classifier
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)
print("DecisionTreeClassifier\n", accuracy_score(y_pred, y_test))
cm= confusion_matrix(y_pred, y_test)
print(classification_report(y_pred, y_test))

In [32]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=tree.classes_)
disp.plot()
plt.show()

In [33]:
#Random Forest Classifier
mlp = MLPClassifier()
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
print("MLPClassifier\n", accuracy_score(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print(classification_report(y_pred, y_test))

In [34]:
#Random Forest Classifier
nb = CategoricalNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
print("CategoricalNB\n", accuracy_score(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print(classification_report(y_pred, y_test))

In [35]:
accuracy_score(y_pred, y_test)

In [36]:
confusion_matrix(y_pred, y_test)

In [37]:
classification_report(y_pred, y_test)