In [1]:
import numpy as np
import matplotlib.pyplot as plt
from functions import *


# load the histograms and the labels
histograms = np.load(os.path.join('colour_histograms','hsv.dat'))
labels = np.load(os.path.join('colour_histograms','labels.dat'))

In [2]:
# generate a random permutation of our data samples
data_size = len(labels)
perm = np.random.permutation(data_size)

# shuffle the data
histograms = histograms[perm]
labels = labels[perm]

# cast the histograms to floats
histograms = 100 * histograms.astype(np.float32)

# split into training and testing data
training_percentage = 0.8

histograms_train = histograms[:int(training_percentage * data_size)]
labels_train = labels[:int(training_percentage * data_size)]
histograms_test = histograms[int(training_percentage * data_size):]
labels_test = labels[int(training_percentage * data_size):]

# Support Vector Machine for Classification

## Linear SVM

In [3]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
    
# create the svm component
lin_clf = SVC(kernel='linear')

# fit the data using the SVM
lin_clf.fit(histograms_train, labels_train)

# predict the classes of the test data
test_predictions = lin_clf.predict(histograms_test)
# predict the classes of the training data
train_predictions = lin_clf.predict(histograms_train)

test_score = accuracy_score(labels_test, test_predictions)
train_score = accuracy_score(labels_train, train_predictions)

print('Training Accuracy : {0:.2f}%'.format(100 * train_score))
print('Testing  Accuracy : {0:.2f}%'.format(100 * test_score))

Training Accuracy : 54.71%
Testing  Accuracy : 44.86%


### Confussion Matrix (for Linear SVM)

In [4]:
confusion_matrix(labels_test, test_predictions)

          |     daisy | dandelion |      rose | sunflower |     tulip
    daisy |        51 |        68 |        15 |         9 |        10
dandelion |        44 |       125 |        17 |        22 |        13
     rose |        13 |        25 |        52 |         6 |        44
sunflower |        13 |        53 |        13 |        71 |         6
    tulip |        20 |        31 |        39 |        16 |        89


## Kernel SVM

In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
    
# create the svm component
rbf_clf = SVC(kernel='rbf')

# fit the data using the SVM
rbf_clf.fit(histograms_train, labels_train)

# predict the classes of the test data
test_predictions = rbf_clf.predict(histograms_test)
# predict the classes of the training data
train_predictions = rbf_clf.predict(histograms_train)

test_score = accuracy_score(labels_test, test_predictions)
train_score = accuracy_score(labels_train, train_predictions)

print('Training Accuracy : {0:.2f}%'.format(100 * train_score))
print('Testing  Accuracy : {0:.2f}%'.format(100 * test_score))

Training Accuracy : 99.74%
Testing  Accuracy : 31.56%


### Confussion Matrix (for Kernel SVM)

In [6]:
confusion_matrix(labels_test, test_predictions)

          |     daisy | dandelion |      rose | sunflower |     tulip
    daisy |         7 |       137 |         3 |         0 |         6
dandelion |         0 |       214 |         0 |         0 |         7
     rose |         0 |       116 |         5 |         0 |        19
sunflower |         2 |       145 |         0 |         2 |         7
    tulip |         0 |       149 |         1 |         0 |        45
