In [58]:
import cv2
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.model_selection import train_test_split
import numpy as np
import os
from tqdm import tqdm
import random
import pickle

In [38]:
def load_dataset():
    imgs = []
    labels = []
    for cf in os.listdir(path_to_dataset):
        try:
            for fn in tqdm(os.listdir(path_to_dataset+'/'+cf), desc=cf):
                img = cv2.imread(path_to_dataset+'/'+cf+'/'+fn)
                imgs.append(img)
                labels.append(cf)
        except:
            print('\r'+cf+' Probably not a folder ....')
    
    return imgs, labels

In [39]:
imgs, labels = load_dataset()

augmentation_dot: 100%|██████████| 400/400 [00:00<00:00, 2094.63it/s]
eighth_note: 100%|██████████| 800/800 [00:00<00:00, 2140.31it/s]
treple_clef: 100%|██████████| 400/400 [00:00<00:00, 1718.74it/s]
time_signature_2_4: 100%|██████████| 400/400 [00:00<00:00, 3010.92it/s]
.DS_Store Probably not a folder ....
whole_note: 100%|██████████| 400/400 [00:00<00:00, 1465.48it/s]
flat_accidental: 100%|██████████| 400/400 [00:00<00:00, 3427.83it/s]
natural_accidental: 100%|██████████| 400/400 [00:00<00:00, 2801.39it/s]
sharp_accidental: 100%|██████████| 400/400 [00:00<00:00, 2364.39it/s]
thirty_second_note: 100%|██████████| 800/800 [00:00<00:00, 2111.89it/s]
quarter_note: 100%|██████████| 800/800 [00:00<00:00, 3023.81it/s]
half_note: 100%|██████████| 800/800 [00:00<00:00, 4118.13it/s]
time_signature_4_4: 100%|██████████| 400/400 [00:00<00:00, 1420.63it/s]
sixtheenth_note: 100%|██████████| 800/800 [00:00<00:00, 3352.61it/s]
double_sharp_accidental: 100%|██████████| 400/400 [00:00<00:00, 4542.76it/

In [50]:
path_to_dataset = r'hand_written_symbols_dataset'
target_img_size = (32, 32)

random_seed = 42  
random.seed(random_seed)
np.random.seed(random_seed)

In [41]:
def extract_raw_pixels(img):
    """
    TODO
    The classification algorithms we are going to use expect the input to be a vector not a matrix. 
    This is because they are general purpose and don't work only on images.
    CNNs, on the other hand, expect matrices since they operate on images and exploit the 
    arrangement of pixels in the 2-D space.
    
    So, what we only need to do in this function is to resize and flatten the image.
    """
    img = cv2.resize(img, target_img_size)
    return img.flatten()

In [51]:
def extract_hog_features(img):
    """
    TODO
    You won't implement anything in this function. You just need to understand it 
    and understand its parameters (i.e win_size, cell_size, ... etc)
    """
    img = cv2.resize(img, target_img_size)
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 9  # Number of orientation bins
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(img)
    h = h.flatten()
    return h.flatten()

In [52]:
def extract_features(img, feature_set='hog'):
    """
    TODO
    Given either 'hog', 'raw', call the respective function and return its output
    """
    if feature_set == 'hog':
        return extract_hog_features(img)
    elif feature_set == 'raw':
        return extract_raw_pixels(img)

In [53]:
def extract_features_for_all(imgs, feature_set='hog'):
    features = []

    for img in tqdm(imgs):
        features.append(extract_features(img, feature_set))
    
    return features

In [54]:
classifiers = {
    'SVM': svm.LinearSVC(random_state=random_seed),
    'KNN': KNeighborsClassifier(n_neighbors=7),
    'NN': MLPClassifier(solver='sgd', random_state=random_seed, hidden_layer_sizes=(500,), max_iter=20, verbose=1)
}

In [55]:
features = extract_features_for_all(imgs, 'hog')

100%|██████████| 8000/8000 [00:01<00:00, 4536.14it/s]


In [59]:
# This function will test all our classifiers on a specific feature set
def train(features, labels, model):
    
    # Since we don't want to know the performance of our classifier on images it has seen before
    # we are going to withhold some images that we will test the classifier on after training 
    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.2, random_state=random_seed)
    
    print('Training .........')

    # Train the model only on the training features
    model.fit(train_features, train_labels)
    
    # Test the model on images it hasn't seen before
    accuracy = model.score(test_features, test_labels)
    
    print('accuracy:', accuracy*100, '%')

In [60]:
model = train(features, labels, classifiers['SVM'])

Training .........
accuracy: 82.75 %


In [64]:
with open('svm.pkl', 'wb') as fid:
    pickle.dump(model, fid)

In [61]:
?pickle.dumps

[0;31mSignature:[0m [0mpickle[0m[0;34m.[0m[0mdumps[0m[0;34m([0m[0mobj[0m[0;34m,[0m [0mprotocol[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;34m*[0m[0;34m,[0m [0mfix_imports[0m[0;34m=[0m[0;32mTrue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return the pickled representation of the object as a bytes object.

The optional *protocol* argument tells the pickler to use the given
protocol; supported protocols are 0, 1, 2, 3 and 4.  The default
protocol is 3; a backward-incompatible protocol designed for Python 3.

Specifying a negative protocol version selects the highest protocol
version supported.  The higher the protocol used, the more recent the
version of Python needed to read the pickle produced.

If *fix_imports* is True and *protocol* is less than 3, pickle will
try to map the new Python 3 names to the old module names used in
Python 2, so that the pickle data stream is readable with Python 2.
[0;31mType:[0m      builtin_function_or_metho