In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

In [2]:
def load_data(path):
    x_train = np.loadtxt(path.format('x_tr'), delimiter=',', skiprows=1)
    x_test = np.loadtxt(path.format('x_tst'), delimiter=',', skiprows=1)
    y_train = np.loadtxt(path.format('y_tr'), delimiter=',', skiprows=1)
    y_test = np.loadtxt(path.format('y_tst'), delimiter=',', skiprows=1)
    
    return x_train, x_test, y_train, y_test

# Task 1
## Multiclass SVM

In [3]:
class MulticlassSVM:
    def fit(self, x, y):
        y = y.astype(int)
        class_cnt = y.max() - y.min() + 1
        one_hot = np.eye(class_cnt)[y]
        # treat SVC as if it were for binary classification only
        self.learners = [SVC().fit(x, one_hot[:, c]) for c in range(class_cnt)]
    
    def predict(self, x):
        return np.array([l.predict(x) for l in self.learners]).argmax(axis=0).astype(float)


Try on dataset 3

In [4]:
x_train, x_test, y_train, y_test = load_data('csv/data3_{}.csv')

scaler = MinMaxScaler()
x_train_norm = scaler.fit_transform(x_train)
x_test_norm = scaler.fit_transform(x_test)

In [5]:
mult_svm = MulticlassSVM()
mult_svm.fit(x_train_norm, y_train)

y_pred = mult_svm.predict(x_test_norm)

report = classification_report(y_test, y_pred, output_dict=True)
print(report['accuracy'])
print(report['macro avg'])

0.9548611111111112
{'precision': 0.9685987509758002, 'recall': 0.9549004999554617, 'f1-score': 0.9584702714410118, 'support': 576}


### Compare different models/implementations

In [6]:
classifiers = [MulticlassSVM(), SVC(gamma='auto'), MultinomialNB()]

results = {}
for clf in classifiers:
    name = type(clf).__name__

    clf.fit(x_train_norm, y_train)
    y_pred = clf.predict(x_test_norm)

    report = classification_report(y_test, y_pred, output_dict=True)
    results[name] = {}
    results[name]['accuracy'] = report['accuracy']
    results[name].update(report['macro avg'])    
    del results[name]['support']

In [7]:
cols = ['accuracy', 'precision', 'recall', 'f1-score']

print("{:<12}\t{:<10}\t{:<10}\t{:<10}\t{:<10}".format('model', *cols))
print()
for k, v in results.items():
    row = [results[k][m] for m in ['accuracy', 'precision', 'recall', 'f1-score']]
    print("{:<12}\t{:.7f}\t{:.7f}\t{:.7f}\t{:.7f}".format(k, *row))

model       	accuracy  	precision 	recall    	f1-score  

MulticlassSVM	0.9548611	0.9685988	0.9549005	0.9584703
SVC         	0.9774306	0.9798941	0.9777066	0.9782037
MultinomialNB	0.9166667	0.9197667	0.9186393	0.9156886
