# Classifier

In [10]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import imageio

In [39]:
#All functions here receive a binarized image.
def bw_ratio(img):
    pixel_count = np.size
    white_pixels = img[img == 1]
    return white_pixels/pixel_count

def wh_ratio(img): #Width over height
    y, x = img.shape
    return x/y

def center_mass_ratio(img):
    height = img.shape[0]
    width = img.shape[1]
    
    #Okay, first let us compute center of mass of bin image
    white_pixels = np.where[img==1]
    #This gives us two lists, containing the coordinates.
    y_mean = white_pixels[0].sum()/white_pixels[0].size
    x_mean = white_pixels[1].sum()/white_pixels[1].size
    #Since image is already at origin, we can just use height and width.
    return (x_mean/width, y_mean/height)
    

In [41]:
#For binarizing...
def otsu_threshold(im):

    # Compute histogram and probabilities of each intensity level
    pixel_counts = [np.sum(im == i) for i in range(256)]
    n, m = im.shape[0:2]
    # Initialization
    s_max = (0,0)

    for threshold in range(256):

        # update
        w_0 = sum(pixel_counts[:threshold])
        w_1 = sum(pixel_counts[threshold:])

        mu_0 = sum([i * pixel_counts[i] for i in range(0,threshold)])\
                        / w_0 if w_0 > 0 else 0
        mu_1 = sum([i * pixel_counts[i] for i in range(threshold, 256)])\
                        / w_1 if w_1 > 0 else 0

        # calculate - inter class variance
        s = w_0 * w_1 * (mu_0 - mu_1) ** 2

        if s > s_max[1]:
            s_max = (threshold, s)
    return s_max[0]

def threshold(pic, threshold):
    return ((pic < threshold) * 255).astype('uint8')

### The feaures we want to analyse are:
- Black/White pixel ratio.
- Width/Height bounding box ratio.
- 'Center of mass' proportions.

In [35]:
#This function returns the above features.
#def extract_features(img):

#### Example images:

In [45]:
img_1f = "cropped_num1.0.png"
img_7f = "cropped_num7.0.png"
img_8f = "cropped_num8.0.png"
img_9f = "cropped_num9.0.png"
#Image 1:
img_1 = imageio.imread(img_1f)
img_1 = threshold(img_1, otsu_threshold(img_1))
img_1

Array([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255,   0],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
       

In [43]:
wh_ratio(img_1)

0.40540540540540543

In [44]:
bw_ratio(img_1)

Array([], dtype=object)

In [31]:
center_mass_ratio(img_1)

TypeError: 'function' object is not subscriptable

### Here the classifier...

In [47]:
classif = RandomForestClassifier(n_estimators=50, random_state=42)