Prepare a model for glass classification using KNN

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('glass.csv')

In [3]:
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RI      214 non-null    float64
 1   Na      214 non-null    float64
 2   Mg      214 non-null    float64
 3   Al      214 non-null    float64
 4   Si      214 non-null    float64
 5   K       214 non-null    float64
 6   Ca      214 non-null    float64
 7   Ba      214 non-null    float64
 8   Fe      214 non-null    float64
 9   Type    214 non-null    int64  
dtypes: float64(9), int64(1)
memory usage: 16.8 KB


In [5]:
X = df.iloc[:,:8].values
y = df.iloc[:, -1].values

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)

In [7]:
X_train

array([[ 1.52369, 13.44   ,  0.     , ...,  0.32   , 12.24   ,  0.     ],
       [ 1.51841, 12.93   ,  3.74   , ...,  0.64   ,  8.96   ,  0.     ],
       [ 1.51831, 14.39   ,  0.     , ...,  1.41   ,  6.47   ,  2.88   ],
       ...,
       [ 1.51593, 13.09   ,  3.59   , ...,  0.67   ,  7.83   ,  0.     ],
       [ 1.5169 , 13.33   ,  3.54   , ...,  0.68   ,  8.11   ,  0.     ],
       [ 1.51797, 12.74   ,  3.48   , ...,  0.64   ,  8.68   ,  0.     ]])

In [8]:
X_test

array([[1.52222e+00, 1.44300e+01, 0.00000e+00, 1.00000e+00, 7.26700e+01,
        1.00000e-01, 1.15200e+01, 0.00000e+00],
       [1.51645e+00, 1.49400e+01, 0.00000e+00, 1.87000e+00, 7.31100e+01,
        0.00000e+00, 8.67000e+00, 1.38000e+00],
       [1.53125e+00, 1.07300e+01, 0.00000e+00, 2.10000e+00, 6.98100e+01,
        5.80000e-01, 1.33000e+01, 3.15000e+00],
       [1.53393e+00, 1.23000e+01, 0.00000e+00, 1.00000e+00, 7.01600e+01,
        1.20000e-01, 1.61900e+01, 0.00000e+00],
       [1.51926e+00, 1.32000e+01, 3.33000e+00, 1.28000e+00, 7.23600e+01,
        6.00000e-01, 9.14000e+00, 0.00000e+00],
       [1.51753e+00, 1.25700e+01, 3.47000e+00, 1.38000e+00, 7.33900e+01,
        6.00000e-01, 8.55000e+00, 0.00000e+00],
       [1.51754e+00, 1.33900e+01, 3.66000e+00, 1.19000e+00, 7.27900e+01,
        5.70000e-01, 8.27000e+00, 0.00000e+00],
       [1.51779e+00, 1.36400e+01, 3.65000e+00, 6.50000e-01, 7.30000e+01,
        6.00000e-02, 8.93000e+00, 0.00000e+00],
       [1.51742e+00, 1.32700e+01

In [9]:
y_train

array([5, 2, 7, 1, 7, 7, 2, 2, 2, 2, 1, 7, 5, 1, 6, 1, 2, 1, 2, 3, 2, 2,
       1, 1, 1, 7, 7, 1, 5, 1, 7, 2, 1, 2, 2, 2, 2, 6, 7, 1, 6, 2, 7, 3,
       5, 3, 2, 7, 7, 1, 7, 2, 7, 7, 1, 2, 2, 1, 5, 7, 5, 2, 2, 7, 5, 7,
       2, 1, 2, 5, 1, 2, 2, 3, 1, 6, 5, 2, 2, 1, 1, 2, 2, 3, 2, 3, 1, 1,
       2, 2, 5, 2, 2, 1, 2, 1, 6, 2, 5, 2, 2, 7, 1, 1, 1, 6, 2, 2, 1, 1,
       2, 1, 1, 1, 6, 1, 3, 2, 7, 1, 2, 5, 2, 7, 1, 1, 2, 1, 1, 1, 1, 1,
       1, 7, 2, 2, 2, 1, 1, 2, 3, 3, 2, 3, 2, 1, 6, 1, 2, 2, 2, 2, 7, 2,
       2, 7, 2, 2, 2, 1], dtype=int64)

In [10]:
y_test

array([2, 7, 2, 2, 1, 1, 1, 3, 1, 1, 2, 1, 2, 1, 3, 2, 2, 2, 3, 1, 1, 7,
       2, 1, 3, 1, 2, 2, 1, 1, 5, 6, 1, 1, 1, 1, 3, 3, 2, 1, 1, 7, 7, 1,
       7, 1, 1, 1, 2, 7, 3, 2, 2, 2], dtype=int64)

In [11]:
classifier = KNeighborsClassifier(n_neighbors=5, metric = 'minkowski', p=2)
classifier.fit(X_train,y_train)

KNeighborsClassifier()

In [12]:
y_pred = classifier.predict(X_test)

In [13]:
y_pred

array([5, 7, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 7,
       2, 1, 1, 1, 2, 2, 1, 1, 5, 6, 1, 1, 1, 2, 1, 1, 5, 1, 1, 7, 7, 1,
       7, 1, 2, 1, 2, 7, 2, 2, 1, 2], dtype=int64)

In [14]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[20  3  0  0  0  0]
 [ 3 11  0  2  0  0]
 [ 6  1  0  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  0  0  0  1  0]
 [ 0  0  0  0  0  6]]
              precision    recall  f1-score   support

           1       0.69      0.87      0.77        23
           2       0.73      0.69      0.71        16
           3       0.00      0.00      0.00         7
           5       0.33      1.00      0.50         1
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         6

    accuracy                           0.72        54
   macro avg       0.63      0.76      0.66        54
weighted avg       0.65      0.72      0.68        54

