In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import matplotlib.pyplot as plt
from sklearn import neighbors, metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset = pd.read_csv('car.data')

In [3]:
dataset['class'].describe

<bound method NDFrame.describe of 0       unacc
1       unacc
2       unacc
3       unacc
4       unacc
        ...  
1723     good
1724    vgood
1725    unacc
1726     good
1727    vgood
Name: class, Length: 1728, dtype: object>

In [4]:
X = dataset[['buying','maint','safety']].values #FEATURES
y = dataset[['class']] #labels
print(X.shape)
print(y.shape)

(1728, 3)
(1728, 1)


In [5]:
#converting data features to be numerical
label_encoder = LabelEncoder()
for i in range(len(X[0])):
    X[ : ,i] = label_encoder.fit_transform(X[:,i]) #X[:,i] means looping all rows column by column , len(X[0]) to select num of cols

print(X) #X transformed to numbers 


[[3 3 1]
 [3 3 2]
 [3 3 0]
 ...
 [1 1 1]
 [1 1 2]
 [1 1 0]]


In [6]:
#mapping labels to numbers by creating a dictionary that holds map
label_mapping = {
    'unacc':0,
    'acc':1,
    'good':2,
    'vgood':3
}
y['class'] = y['class'].map(label_mapping)
y = np.array(y)
print(y.shape)

(1728, 1)


In [7]:
#train test split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.2)
print(X.shape)
print(y.shape)

(1728, 3)
(1728, 1)


In [8]:
#building the model( KNN --K nearest neighbour Classifier )
model = neighbors.KNeighborsClassifier(n_neighbors=50,weights='distance')
model.fit(X_train,y_train.ravel()) #.ravel is used to change labels from column to row bc error was raised
prediction = model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, prediction)

In [9]:
print("predictions: \n", prediction)
print("accuracy:",accuracy)

predictions: 
 [0 0 0 1 1 0 0 0 1 1 0 0 0 0 2 1 0 0 0 0 0 3 1 0 0 0 0 0 1 0 0 3 0 1 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 0 0 0 0 0 0 0 1 1 1 0 2 0 0 0 1 0 0 1
 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 1 0 2 0 2 0 0 0 0 0
 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 3 1 1 0 0 1 0 0 1
 0 1 1 0 3 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 2 0 1 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 2
 2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 1 0 0 0 0 1 0 0 1 1 0 0 0 3 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 3
 0 1 0 0 0 1 0 0 0 3 0 0 0]
accuracy: 0.7658959537572254


In [10]:
#testing against custom inputs
actual_value = y[[1400]]
predicted_value = model.predict(X)[1400]
print(actual_value)
print(predicted_value)

[[1]]
1


In [13]:
#saving model
import joblib
filename = 'model.sav' 
joblib.dump(model,filename)

['model.sav']