# Importing packages

In [1]:
import csv
import numpy as np
import math
from random import shuffle

import glass_classification as GC

# Loading the data from file

In [2]:
dataset_file = "glass.csv"

with open(dataset_file, newline='') as csvfile:
    csv_reader = csv.reader(csvfile, delimiter=",", quotechar="|")
    headers = next(csv_reader,None)
    dataset = np.array([[float(x) for x in row] for row in csv_reader])

examples = dataset[:, :-1]
num_examples = examples.shape[0]
num_features = examples.shape[1]

labels = dataset[:, -1]
labels = labels.astype(int)

# Shuffling examples and labels
indexes = [i for i in range(num_examples)]
shuffle(indexes)
examples = examples[indexes]
labels = labels[indexes]

# Dupplicating labels for each class for the multiclass classification one-vs-all
num_classes = max(labels)
print("Number of classes : "+str(num_classes))
labels_by_class = np.array([((labels==i)*1).reshape((1,num_examples)) for i in range(1,num_classes+1,1)])

print("Number of lines in the dataset : "+str(num_examples))
print("Number of features per example : "+str(num_features)+"\n")
print("Features : "+str(headers[:-1]))

Number of classes : 7
Number of lines in the dataset : 214
Number of features per example : 9

Features : ['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']


# Training the model

In [3]:
print("Number of classes : "+str(num_classes))

tot_num_examples, num_features = examples.shape
print("Number of features : "+str(num_features))

print("Total number of examples : "+str(tot_num_examples))
num_training = math.ceil(tot_num_examples * 0.8)
print("Number of training examples : "+str(num_training))
num_testing = tot_num_examples - num_training
print("Number of testing examples : "+str(num_testing))

n_l = [num_features, 7, 7, 1]
training_examples = examples[:num_training, :]
testing_examples = examples[num_training:, :]

parameters_by_class = [None for i in range(num_classes)]

for i in range(num_classes):
    training_labels = labels_by_class[i][:,:num_training]

    print("\n\nTraining model for class "+str(i)+"...")
    parameters = GC.nn_model(training_examples.T, training_labels, n_l, learning_rate=0.9, num_iterations=10000, print_cost=False)
    parameters_by_class[i] = parameters

Number of classes : 7
Number of features : 9
Total number of examples : 214
Number of training examples : 172
Number of testing examples : 42


Training model for class 0...
Cost after all iterations : 0.6176039195048715



Training model for class 1...
Cost after all iterations : 0.6630294559865032



Training model for class 2...
Cost after all iterations : 0.2960328391321774



Training model for class 3...
Cost after all iterations : 1.4730460028128412e-05



Training model for class 4...
Cost after all iterations : 0.20530664714974145



Training model for class 5...
Cost after all iterations : 0.18811283169298096



Training model for class 6...
Cost after all iterations : 0.3823884760214366



# Making predictions

In [16]:
accuracy_by_class = [None for i in range(num_classes)]

for i in range(num_classes):
    predictions = GC.predict(parameters_by_class[i], testing_examples.T)
    Y = labels_by_class[i][:,num_training:]
    accuracy = float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100)
    accuracy_by_class[i] = accuracy

print(accuracy_by_class)

[59.523809523809526, 73.80952380952381, 95.23809523809523, 100.0, 90.47619047619048, 97.61904761904762, 83.33333333333334]
