In [1]:
# Standard scientific Python imports
import matplotlib.pyplot as plt
import numpy as np
import time
import datetime as dt
import os

# Import datasets, classifiers and performance metrics
from sklearn import svm, metrics

In [4]:
# Import whole N-MNIST Dataset
def load_NMNIST(path):
    """
    """
    xs_train = []
    ys_train = []
    xs_test = []
    ys_test = []

    for class_index in range(0, 10):
        for (root, dirs, dat_files) in os.walk('{0}/n_Train/{1}'.format(path, str(class_index))):
            for file in dat_files:
                single_X = np.fromfile('{0}/n_Train/{1}/{2}'.format(path, str(class_index), file), dtype=np.int32)
                xs_train.append(single_X)
                ys_train.append(class_index)

        for (root, dirs, dat_files) in os.walk('{0}/n_Test/{1}'.format(path, str(class_index))):
            for file in dat_files:
                xs_test.append(np.fromfile('{0}/n_Test/{1}/{2}'.format(path, str(class_index), file), dtype=np.int32))
                ys_test.append(class_index)

    Xtr = np.array(xs_train)
    Ytr = np.array(ys_train)
    Xte = np.array(xs_test)
    Yte = np.array(ys_test)

    return Xtr, Ytr, Xte, Yte

In [5]:
dataset_class_path = '/Users/bcaloger/Desktop/LowPowerActionRecognition/CNN/datasets'
X_train, Y_train, X_test, Y_test = load_NMNIST(dataset_class_path)

In [6]:
# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', Y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', Y_test.shape)

('Training data shape: ', (60000, 1156))
('Training labels shape: ', (60000,))
('Test data shape: ', (10000, 1156))
('Test labels shape: ', (10000,))


We have our full dataset, now we would like to do some dev work, let's reduce some sizes

In [22]:
num_training = 60000
num_dev = 10000
num_test = 10000
num_test_dev = 200

# We will also make a development set, which is a small subset of
# the training set.
mask = np.random.choice(num_training, num_dev, replace=False)
X_dev = X_train[mask]
Y_dev = Y_train[mask]


mask = np.random.choice(num_test, num_test_dev, replace=False)
X_test_small = X_test[mask]
Y_test_small = Y_test[mask]


print('Training data shape: ', X_dev.shape)
print('Training labels shape: ', Y_dev.shape)
print('Test data shape: ', X_test_small.shape)
print('Test labels shape: ', Y_test_small.shape)


('Training data shape: ', (10000, 1156))
('Training labels shape: ', (10000,))
('Test data shape: ', (200, 1156))
('Test labels shape: ', (200,))


In [23]:
param_C = 5
param_gamma = 0.05
classifier = svm.SVC(C=param_C,gamma=param_gamma)

start_time = dt.datetime.now()
print('Start learning at {}'.format(str(start_time)))

classifier.fit(X_train, Y_train)

end_time = dt.datetime.now() 
print('Stop learning {}'.format(str(end_time)))
elapsed_time= end_time - start_time
print('Elapsed learning {}'.format(str(elapsed_time)))

Start learning at 2018-11-26 22:23:56.465114
Stop learning 2018-11-27 07:56:47.432521
Elapsed learning 9:32:50.967407


In [24]:
# Now predict the value of the test
expected = Y_test
predicted = classifier.predict(X_test)


print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
      
cm = metrics.confusion_matrix(expected, predicted)
print("Confusion matrix:\n%s" % cm)

# plot_confusion_matrix(cm)

print("Accuracy={}".format(metrics.accuracy_score(expected, predicted)))

Classification report for classifier SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.05, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       980
           1       0.11      1.00      0.20      1135
           2       0.00      0.00      0.00      1032
           3       0.00      0.00      0.00      1010
           4       0.00      0.00      0.00       982
           5       0.00      0.00      0.00       892
           6       0.00      0.00      0.00       958
           7       0.00      0.00      0.00      1028
           8       0.00      0.00      0.00       974
           9       0.00      0.00      0.00      1009

   micro avg       0.11      0.11      0.11     10000
   macro avg       0.01      0.10      0.02     10000
weighted avg       0.01      0.11    

In [25]:
classifier = svm.SVC(C=200,kernel='rbf',gamma=0.01,cache_size=8000,probability=False)

start_time = dt.datetime.now()
print('Start learning at {}'.format(str(start_time)))

classifier.fit(X_train, Y_train)

end_time = dt.datetime.now() 
print('Stop learning {}'.format(str(end_time)))
elapsed_time= end_time - start_time
print('Elapsed learning {}'.format(str(elapsed_time)))

Start learning at 2018-11-27 08:09:05.785822
Stop learning 2018-11-27 10:56:08.291935
Elapsed learning 2:47:02.506113


In [26]:
# Now predict the value of the test
expected = Y_test
predicted = classifier.predict(X_test)


print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
      
cm = metrics.confusion_matrix(expected, predicted)
print("Confusion matrix:\n%s" % cm)

# plot_confusion_matrix(cm)

print("Accuracy={}".format(metrics.accuracy_score(expected, predicted)))

Classification report for classifier SVC(C=200, cache_size=8000, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       980
           1       0.99      0.68      0.81      1135
           2       0.00      0.00      0.00      1032
           3       0.00      0.00      0.00      1010
           4       0.00      0.00      0.00       982
           5       0.00      0.00      0.00       892
           6       0.00      0.00      0.00       958
           7       0.11      1.00      0.20      1028
           8       0.00      0.00      0.00       974
           9       0.00      0.00      0.00      1009

   micro avg       0.18      0.18      0.18     10000
   macro avg       0.11      0.17      0.10     10000
weighted avg       0.12      0.18 