# Car Evaluation

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.linear_model._logistic import LogisticRegression

In [40]:
car_data = pd.read_csv('data files/car.csv')

In [41]:
car_data.shape

(1728, 7)

In [42]:
car_data.describe(exclude='number')

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,label
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,vhigh,vhigh,2,2,small,low,unacc
freq,432,432,432,576,576,576,1210


In [43]:
car_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   object
 3   persons   1728 non-null   object
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   label     1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [44]:
car_data.label.value_counts()

unacc    1210
acc       384
good       69
vgood      65
Name: label, dtype: int64

In [45]:
car_data.label = ['acc' if((l=='good') | (l=='vgood') | (l=='acc')) else 'unacc' for l in car_data.label]

In [46]:
car_data = pd.concat([
    car_data[car_data.label == 'acc'],
    car_data[car_data.label == 'unacc'].sample(n=518)
])

In [47]:
car_data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,label
227,vhigh,med,2,4,small,high,acc
230,vhigh,med,2,4,med,high,acc
232,vhigh,med,2,4,big,med,acc
233,vhigh,med,2,4,big,high,acc
239,vhigh,med,2,more,med,high,acc
...,...,...,...,...,...,...,...
1573,low,med,4,2,big,med,unacc
1381,low,vhigh,5more,2,med,med,unacc
906,med,vhigh,3,4,big,low,unacc
729,high,med,5more,2,small,low,unacc


In [48]:
car_data.label.value_counts()

acc      518
unacc    518
Name: label, dtype: int64

In [49]:
le = LabelEncoder()
car_data.label  = le.fit_transform(car_data.label )

In [50]:
car_data.label.value_counts()

0    518
1    518
Name: label, dtype: int64

In [80]:
X_train,X_test,y_train,y_test = train_test_split(car_data.drop(columns=['label']),
                                                 car_data['label'],
                                                 test_size=0.2,
                                                random_state=0)

In [104]:
X_train

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
1632,low,low,2,4,med,low
1637,low,low,2,4,big,high
767,high,low,2,4,small,high
246,vhigh,med,3,2,med,low
555,high,high,2,4,big,low
...,...,...,...,...,...,...
1148,med,med,4,4,med,high
1330,low,vhigh,3,2,big,med
694,high,med,3,more,small,med
490,high,vhigh,4,2,med,med


In [90]:
#transformer 01

columTransformer = ColumnTransformer(transformers=
    [
        ('tf1', OrdinalEncoder(categories=ohe.categories_),['buying', 'maint', 'lug_boot', 'safety', 'persons', 'doors'])
 
 ],
 remainder='passthrough')

In [105]:
#transformer 02
x_transform = columTransformer.fit_transform(X_train)

In [108]:
#transformer 03
xtest_transform = columTransformer.fit_transform(X_test)

In [92]:
#transformer 04
#Decision Tree Classifier

tree = DecisionTreeClassifier()
#tree.fit(X_train, y_train)

In [113]:
pipe = Pipeline([
    ('1', columTransformer),
    ('2',x_transform),
    ('4', tree)
]
)


In [111]:
pipe.fit(X_train,y_train)

  if t is None or t == "passthrough":


TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' '[[1. 1. 1. 1. 1. 0.]
 [1. 1. 0. 0. 1. 0.]
 [0. 1. 2. 0. 1. 0.]
 ...
 [0. 2. 2. 2. 2. 1.]
 [0. 3. 1. 2. 0. 2.]
 [0. 3. 2. 2. 0. 0.]]' (type <class 'numpy.ndarray'>) doesn't

In [96]:
prediction = pipe.predict(X_test)

In [97]:
accuracy_score(y_test,prediction)

0.9759615384615384

## Exporting the pipeline

In [98]:
# export 
import pickle
pickle.dump(pipe,open('careval.pkl','wb'))