In [4]:
import os
import pandas as pd
import numpy as np
import streamlit as st 

import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier 
from sklearn import linear_model, preprocessing

import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style

from icecream import ic

# configure logging
import logging


In [5]:

file_root = 'k-means'

# configure logging
logger = logging.getLogger(__name__)
logging.basicConfig(
    filename=f"./logs/{file_root}.log",
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
    
def log_this(arr, msg):
    # logger.info(f"in {log_this.__name__}")
    # arr = np.arange(0,20)
    # msg = "TEST MSG"
    logger.info(f"{msg}: {arr}")
    # logger.info(f"arr.shape: {arr.shape}")  
    

In [7]:
data = pd.read_csv(("data/CarDataSet/car.data"))

data.head()

Unnamed: 0,buying,maint,door,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [8]:
# Get column names
col_names = data.columns
log_this(col_names, "col_names")

In [9]:
col_names

Index(['buying', 'maint', 'door', 'persons', 'lug_boot', 'safety', 'class'], dtype='object')

In [10]:
# encode text labels into integer values
le = preprocessing.LabelEncoder()

buying = le.fit_transform(list(data["buying"]))
# log_this(buying, "buying")
# st.subheader("buying")
# st.write(f"buying: {buying}")

maint = le.fit_transform(list(data["maint"]))
# log_this(maint, "maint")
# st.subheader("maint")
# st.write(f"maint: {maint}")

door = le.fit_transform(list(data["door"]))
# log_this(door, "door")
# st.subheader("door")
# st.write(f"door: {door}")

persons = le.fit_transform(list(data["persons"]))
# log_this(persons, "persons")

safety = le.fit_transform(list(data["safety"]))
# log_this(safety, "safety")

lug_boot = le.fit_transform(list(data["lug_boot"]))
# log_this(lug_boot, "lug_boot")

cls = le.fit_transform(list(data["class"]))
# log_this(cls, "class")


In [12]:
predict = "class"

In [13]:
# features
X = list(zip(buying, maint, door, persons, lug_boot, safety, cls))

In [14]:
# labels
y = list(cls)

In [15]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.1)

model = KNeighborsClassifier(n_neighbors=5)

model.fit(x_train, y_train)
acc = model.score(x_test, y_test)

acc


1.0

In [16]:
names = list(set(data["class"]))
names

['good', 'acc', 'unacc', 'vgood']

In [17]:
predicted = model.predict(x_test) 
predicted

array([2, 2, 2, 2, 2, 3, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2,
       2, 0, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 0, 2, 1, 2, 0, 2, 2,
       2, 2, 1, 2, 0, 2, 3, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 0, 2, 3, 0, 2,
       2, 3, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0,
       2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 1, 3, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2,
       2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [28]:
attribute_lst = ["class", "lug_boot", "safety", "persons", "door", "maint", "buying"]

sample_data = x_test[0]
sample_data

res_dict = {}
ind = 0
for attr in attribute_lst:
    res_dict.update({attr: sample_data[ind]})
    ind += 1

print(f"res_dict: {res_dict}")


res_dict: {'class': 1, 'lug_boot': 0, 'safety': 2, 'persons': 0, 'door': 2, 'maint': 1, 'buying': 2}
