# Predict cylinders based on other characteristics

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

%matplotlib inline

mtcars = pd.read_csv('mtcars.csv', index_col='model')
mtcars['cyl'] = mtcars.cyl.astype('category')
mtcars.head()

In [None]:
scaler = StandardScaler()
scaler.fit(mtcars.drop('cyl', axis=1))
scaled_features = scaler.transform(mtcars.drop('cyl', axis=1))
df_feat = pd.DataFrame(scaled_features, columns=mtcars.columns[:-1])
df_feat.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_features, mtcars['cyl'], test_size=0.30)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

pred

In [None]:
print(confusion_matrix(y_test, pred))

In [None]:
print(classification_report(y_test, pred))

In [None]:
error_rate = []

for i in range(1, 23):  
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

error_rate[0:2]

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1, 23), error_rate, color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')