

Prepare a model for glass classification using KNN, SVM and Naive Bayes.
Also calculate Accuracy by confusion matrix, Calculate accuracy score, precision score, recall score, f1 score.
Data Description:
RI : refractive index
Na: Sodium (unit measurement: weight percent in corresponding oxide, as are attributes 4-10)
Mg: Magnesium
AI: Aluminum
Si: Silicon
K:Potassium
Ca: Calcium
Ba: Barium
Fe: Iron
Type: Type of glass: (class attribute)
1 -- building_windows_float_processed
 2 --building_windows_non_float_processed
 3 --vehicle_windows_float_processed
 4 --vehicle_windows_non_float_processed (none in this database)
 5 --containers
 6 --tableware
 7 --headlamps

Dataset : Glass.csv

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action="ignore")

In [2]:
df = pd.read_csv("glass.csv")
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [3]:
df.isna().sum()

RI      0
Na      0
Mg      0
Al      0
Si      0
K       0
Ca      0
Ba      0
Fe      0
Type    0
dtype: int64

In [4]:
df.corr()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
RI,1.0,-0.191885,-0.122274,-0.407326,-0.542052,-0.289833,0.810403,-0.000386,0.14301,-0.164237
Na,-0.191885,1.0,-0.273732,0.156794,-0.069809,-0.266087,-0.275442,0.326603,-0.241346,0.502898
Mg,-0.122274,-0.273732,1.0,-0.481799,-0.165927,0.005396,-0.44375,-0.492262,0.08306,-0.744993
Al,-0.407326,0.156794,-0.481799,1.0,-0.005524,0.325958,-0.259592,0.479404,-0.074402,0.598829
Si,-0.542052,-0.069809,-0.165927,-0.005524,1.0,-0.193331,-0.208732,-0.102151,-0.094201,0.151565
K,-0.289833,-0.266087,0.005396,0.325958,-0.193331,1.0,-0.317836,-0.042618,-0.007719,-0.010054
Ca,0.810403,-0.275442,-0.44375,-0.259592,-0.208732,-0.317836,1.0,-0.112841,0.124968,0.000952
Ba,-0.000386,0.326603,-0.492262,0.479404,-0.102151,-0.042618,-0.112841,1.0,-0.058692,0.575161
Fe,0.14301,-0.241346,0.08306,-0.074402,-0.094201,-0.007719,0.124968,-0.058692,1.0,-0.188278
Type,-0.164237,0.502898,-0.744993,0.598829,0.151565,-0.010054,0.000952,0.575161,-0.188278,1.0


In [5]:
df = df.drop(['Si', 'K', 'Ca', 'Fe'], axis=1)

In [6]:
df['Type'].value_counts()

Type
2    76
1    70
7    29
3    17
5    13
6     9
Name: count, dtype: int64

In [7]:
x=df.drop('Type', axis=1)
y=df['Type']

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7,random_state=123456)



In [8]:
from sklearn.naive_bayes import GaussianNB

def create_naive_bayes_model():
    model = GaussianNB()
    model.fit(x_train,y_train)
    return model

In [9]:
from sklearn.neighbors import KNeighborsClassifier

def create_knn_model():
    model = KNeighborsClassifier()
    model.fit(x_train,y_train)
    return model

In [10]:
from sklearn.svm import SVC

def create_svm_model():
    model=SVC()
    model.fit(x_train,y_train)
    return model

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score

def evaluate_model(model_name, model):
    y_pred = model.predict(x_test)
    y_true = y_test

    cm = confusion_matrix(y_true,y_pred)
    accuracy = accuracy_score(y_true,y_pred)
    precision = precision_score(y_true,y_pred,average='macro')
    recall = recall_score(y_true,y_pred,average='macro')
    f1 = f1_score(y_true,y_pred,average='macro')

    return model_name,cm,accuracy,precision,recall,f1


In [17]:
models = pd.DataFrame([
    evaluate_model("Naive Bayes", create_naive_bayes_model()),
    evaluate_model("KNN", create_knn_model()),
    evaluate_model("SVM", create_svm_model())
], columns=['Model Name', 'cm', 'accuracy', 'precision', 'recall', 'f1'])
models

Unnamed: 0,Model Name,cm,accuracy,precision,recall,f1
0,Naive Bayes,"[[13, 0, 0, 0, 5, 0], [4, 0, 0, 2, 17, 0], [2,...",0.384615,0.346061,0.456734,0.327302
1,KNN,"[[15, 3, 0, 0, 0, 0], [4, 17, 0, 0, 2, 0], [4,...",0.661538,0.609105,0.601471,0.552487
2,SVM,"[[18, 0, 0, 0, 0, 0], [18, 5, 0, 0, 0, 0], [6,...",0.461538,0.260606,0.308959,0.256905
