In [19]:
#importing various libraries we will be using
import sklearn
import numpy as np #working with arrays. 
import pandas as pd #data manipulation library that is necessary for every aspect of data analysis or machine learning.

In [20]:
#load the datset
df = pd.DataFrame(pd.read_csv('car_evaluation.csv',names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','class']
                              ,header = None, index_col = False))


In [21]:
#check the dataset shape
df.shape

(1728, 7)

In [22]:
#view the column lables
df.head(5)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [23]:
#view the datatypes of the columns 
df.dtypes

buying      object
maint       object
doors       object
persons     object
lug_boot    object
safety      object
class       object
dtype: object

Since most of the dataset is made up of strings we will use one hot encoding which will enable us to have the columns that are strings to intergers so as to improve the predictions hence we will be carrying out feature engineering

In [24]:
#viewing columns
df.columns 

Index(['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class'], dtype='object')

In [25]:
input_features = df.columns[:-1]
target_feature = df.columns[-1]

input_features,target_feature

df_inputs = df[input_features].copy(deep = True)
df_target = df[target_feature].copy(deep = True)

In [26]:
#Number of rows belonging to each class
df.groupby('safety').size()

safety
high    576
low     576
med     576
dtype: int64

In [27]:
# import train_test_split
from sklearn.model_selection import train_test_split

In [28]:
#splitting the dataset into train and testing

X_train, X_test, y_train, y_test = train_test_split(df_inputs, df_target, test_size = 0.30, random_state = 42)

print('Train dataset size:', X_train.shape)
print('Test dataset size:', X_test.shape)

Train dataset size: (1209, 6)
Test dataset size: (519, 6)


In [29]:
# import OneHotEncoder
from sklearn.preprocessing import OneHotEncoder

In [30]:
#instantiate the OneHotEncoder
encoder = OneHotEncoder(sparse = False, handle_unknown = 'ignore')
encoder.fit(X_train)
X_train.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
1178,med,med,5more,4,big,high
585,high,high,3,more,small,low
1552,low,med,3,4,med,med
1169,med,med,5more,2,big,high
1033,med,high,4,2,big,med


In [31]:
features_encoded = encoder.get_feature_names_out(X_train.columns)
features_encoded

array(['buying_high', 'buying_low', 'buying_med', 'buying_vhigh',
       'maint_high', 'maint_low', 'maint_med', 'maint_vhigh', 'doors_2',
       'doors_3', 'doors_4', 'doors_5more', 'persons_2', 'persons_4',
       'persons_more', 'lug_boot_big', 'lug_boot_med', 'lug_boot_small',
       'safety_high', 'safety_low', 'safety_med'], dtype=object)

In [32]:
X_train[features_encoded] = encoder.transform(X_train)
X_test[features_encoded]= encoder.transform(X_test)

In [33]:
X_train.drop(columns = input_features, axis = 1, inplace = True)
X_test.drop(columns = input_features, axis = 1, inplace = True)

print('Train dataset size:', X_train.shape)
print('Test dataset size:', X_test.shape)

Train dataset size: (1209, 21)
Test dataset size: (519, 21)


In [34]:
#KNN Predicions
# Fitting clasifier to the Training set
# Loading libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,plot_confusion_matrix
from sklearn.model_selection import cross_val_score

# Instantiate learning model (k = 4)
classifier = KNeighborsClassifier(n_neighbors=4)

# Fitting the model
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

In [35]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 94,   4,  20,   0],
       [ 11,   5,   3,   0],
       [ 23,   1, 334,   0],
       [ 11,   4,   2,   7]])

In [18]:
#Calculating Model Accuracy
accuracy = accuracy_score(y_test, y_pred)*100
print('Accuracy of our model is equal ' + str(round(accuracy, 2)) + ' %.')

Accuracy of our model is equal 84.78 %.
