# Classifier

In [161]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import imageio
import glob

In [162]:
#All functions here receive a binarized image.
def bw_ratio(img):
    pixel_count = img.size
    white_pixels = img[img == 255].size
    #print(pixel_count)
    #print(img)
    #print(img[img == 255].size)
    return white_pixels/pixel_count

def wh_ratio(img): #Width over height
    y, x = img.shape
    return x/y

def center_mass_ratio(img):
    height = img.shape[0]
    width = img.shape[1]
    
    #Okay, first let us compute center of mass of bin image
    white_pixels = np.where(img==0)
    #This gives us two lists, containing the coordinates.
    y_mean = white_pixels[0].sum()/white_pixels[0].size
    x_mean = white_pixels[1].sum()/white_pixels[1].size
    #Since image is already at origin, we can just use height and width.
    return (x_mean/width, y_mean/height)
    

In [163]:
#For binarizing...
def otsu_threshold(im):

    # Compute histogram and probabilities of each intensity level
    pixel_counts = [np.sum(im == i) for i in range(256)]
    n, m = im.shape[0:2]
    # Initialization
    s_max = (0,0)

    for threshold in range(256):

        # update
        w_0 = sum(pixel_counts[:threshold])
        w_1 = sum(pixel_counts[threshold:])

        mu_0 = sum([i * pixel_counts[i] for i in range(0,threshold)])\
                        / w_0 if w_0 > 0 else 0
        mu_1 = sum([i * pixel_counts[i] for i in range(threshold, 256)])\
                        / w_1 if w_1 > 0 else 0

        # calculate - inter class variance
        s = w_0 * w_1 * (mu_0 - mu_1) ** 2

        if s > s_max[1]:
            s_max = (threshold, s)
    return s_max[0]

def threshold(pic, threshold):
    return ((pic < threshold) * 255).astype('uint8')

In [164]:
#This function returns the above features.
def extract_features(img):
    center_mass = center_mass_ratio(img)
    return (bw_ratio(img), wh_ratio(img), center_mass[0], center_mass[1])

### The feaures we want to analyse are:
- Black/White pixel ratio.
- Width/Height bounding box ratio.
- 'Center of mass' proportions.

#### Example images:

In [None]:
img_1f = "cropped_num1.0.png"
img_7f = "cropped_num7.0.png"
img_8f = "cropped_num8.0.png"
img_9f = "cropped_num9.0.png"
#Image 1:
img_1 = imageio.imread(img_1f)
img_1 = threshold(img_1, otsu_threshold(img_1))
img_1

In [None]:
wh_ratio(img_1)

In [None]:
bw_ratio(img_1)

In [None]:
center_mass_ratio(img_1)

In [166]:
X = []
Y = []

def fill_training_sets(path, X, Y):
    note_type = glob.glob(path)
    N = 0
    for note in note_type:
        an_img = imageio.imread(note)
        an_img = threshold(an_img, otsu_threshold(an_img))
        img_features = extract_features(an_img)
        X.append(list(img_features))
        Y.append(N)
        N+=1

#Crotchet
#fill_training_sets("../assets/reference/crotchet/*", X, Y)

#Minim
#fill_training_sets("../datasets/minim/*", X, Y, 2)

#G Clef
#fill_training_sets("../datasets/g_clef/*", X, Y, 3)

In [167]:
#Build a classifier. Receives path to images.
def build_classifier(path):
    X = []
    Y = []
    fill_training_sets(path, X, Y)
    clf = RandomForestClassifier(n_estimators=50, random_state=42)
    clf.fit(X,Y)
    return clf

### Here the classifier...

In [168]:
test_node = imageio.imread(test)
test_node = threshold(test_node, otsu_threshold(test_node))
test_feat = extract_features(test_node)
test_feat

(0.6864864864864865,
 0.40540540540540543,
 0.5486590038314176,
 0.7576887232059646)

In [169]:
#Classifier for crotchet
cr_clf = build_classifier("../assets/reference/crotchet/*")
clef_clf = build_classifier("../assets/reference/clef/*")

In [172]:
print(cr_clf.predict_proba([test_feat]))
print(clef_clf.predict_proba([test_feat]))

[[0.06 0.24 0.   0.06 0.   0.   0.   0.08 0.2  0.2  0.14 0.   0.02 0.  ]]
[[1.]]


In [175]:
cr_clf.predict([test_feat])

array([1])

In [1]:
import symbol_classifier

IndentationError: unexpected indent (symbol_classifier.py, line 38)