# Digit Recognition Application

Run this application by navigating to the 'Kernel'-tab and select 'Restart and Run all'

### Imports

In [1]:
import ipywidgets as widget
import pandas as pd
from os import listdir
from os.path import isfile, join

from SimpleCV import *
from IPython.display import display
from sklearn.externals import joblib

  return f(*args, **kwds)


RuntimeError: The current Numpy installation ('/anaconda3/lib/python3.6/site-packages/numpy/__init__.py') fails to pass simple sanity checks. This can be caused for example by incorrect BLAS library being linked in, or by mixing package managers (pip, conda, apt, ...). Search closed numpy issues for similar problems.

In [2]:
clf= joblib.load('../classifiers/grid_scale_classifiers.joblib')
columns = pd.read_csv("../dataset-numpy/feature-selection/v8x8-manual1.csv").drop('Unnamed: 0', axis=1).columns
print(columns)

Index([u'label', u'number_of_holes', u'width', u'aspect_ratio', u'area',
       u'angle', u'radius', u'contour', u'hull_radius', u'weight0_2',
       u'weight1_2', u'weight2_2', u'weight3_2', u'weight4_2', u'weight5_2',
       u'weight6_2', u'weight7_2', u'weight0_3', u'weight1_3', u'weight2_3',
       u'weight3_3', u'weight4_3', u'weight5_3', u'weight6_3', u'weight7_3',
       u'weight0_4', u'weight1_4', u'weight2_4', u'weight3_4', u'weight4_4',
       u'weight5_4', u'weight6_4', u'weight7_4', u'weight0_5', u'weight1_5',
       u'weight2_5', u'weight3_5', u'weight4_5', u'weight5_5', u'weight6_5',
       u'weight7_5'],
      dtype='object')


1. Image Selection: Select one or more images containing postalcodes in a 128x32pixel image that you would like to predict the digits for. 
2. Image Seperation: For each image, seperate the digits.
3. Feature Extraction: For each digit, extract the vision and pixel based features.
4. Predict Digit: For each digit, normalize and scale it, then predict the digit using the classifier.
5. Widget: For visual representation of uploading and predicting the digit.

In [3]:
disp = Display(displaytype='notebook')
app = widget.Output()
filepath = "../dataset-images/"

#### Select image features
For more information, go to file _1. Feature Extraction_.

In [4]:
def get_amount_of_holes(image):
    inverted = image.embiggen(2).invert()
    
    holes = 0
    inv_blobs = inverted.findBlobs()
    for blob in inv_blobs:
        if blob.area() < 3721:
            holes += 1
    return(holes)

def get_features_from_number(digit):
    number = []
    
    # vision based features
    blobs = digit.findBlobs()
    number.append(get_amount_of_holes(digit))
    
    for blob in blobs:
        number.append(blob.width())
        number.append(blob.aspectRatio())
        number.append(float(blob.area()))
        number.append(blob.angle())
        number.append(blob.radius())
        number.append(len(blob.contour()))
        number.append(blob.hullRadius())
    
    # pixel based features
    THRESHOLD = 5
    grid_cell_amount = 8
    
    matrix = get_amount_white_pixel_per_number(digit, grid_cell_amount, THRESHOLD)
    for y in range(matrix.shape[1]):
        for x in range(matrix.shape[0]):
            number.append(matrix[x,y])
        
    return number

def get_amount_white_pixel_per_number(image, grid_cell_amount, threshold):
    grid_cell_size = 32 / grid_cell_amount
    
    matrix = np.zeros((grid_cell_amount, grid_cell_amount))
    T = threshold
    
    for y in range(grid_cell_amount):
        for x in range(grid_cell_amount):
                grid_cell = image.crop(x * grid_cell_size, y * grid_cell_size, grid_cell_size, grid_cell_size)
                pixel_arr = grid_cell.getNumpy() / 255
                sum = (pixel_arr[:,:,0]).sum()
                matrix[x][y] = sum if sum > T else 0
    
    # drop unnecessary weight columns (0_, 1_, 6_, 7_) - manual feature selection
    matrix = matrix[2:-2, :]
    
    return matrix.T

#### Seperate the selected image into individual numbers

In [5]:
def process_image(image, filename):
    processed_filename = filename.strip('../dataset-images/').rstrip('.png')
    processed_image = image.binarize().dilate().erode()
    
    numbers = []
    xpos = 0
    for char in processed_filename:
        crop = processed_image.crop(xpos, 0, 32, 32, centered=False)
        xpos += 32    
#         crop.save(disp)
        number = [get_features_from_number(crop)]        
        numbers.append(number)
        
    return numbers

#### Create a dataframe from the numbers in the selected image

In [6]:
def features_to_df(numbers):
    
    df = pd.DataFrame(numbers, columns=['number_of_holes', 'width', 'aspect_ratio', 'area', \
       'angle', 'radius', 'contour', 'hull_radius', 'weight0_2', \
       'weight1_2', 'weight2_2', 'weight3_2', 'weight4_2', 'weight5_2', \
       'weight6_2', 'weight7_2', 'weight0_3', 'weight1_3', 'weight2_3', \
       'weight3_3', 'weight4_3', 'weight5_3', 'weight6_3', 'weight7_3', \
       'weight0_4', 'weight1_4', 'weight2_4', 'weight3_4', 'weight4_4', \
       'weight5_4', 'weight6_4', 'weight7_4', 'weight0_5', 'weight1_5', \
       'weight2_5', 'weight3_5', 'weight4_5', 'weight5_5', 'weight6_5', \
       'weight7_5'])

    return df


#### Predict the label of the numbers from the selected image

In [7]:
def predict(numbers):
    features = features_to_df(numbers)
   # scaled = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)
   # return classifier.predict(features)
    return clf.predict(features)

#### Application logic
Initiate the base. When an image is selected it will start to predict (on_value_change) the postalcode. 

In [8]:
def on_value_change(change):
    app.clear_output(wait=True)
    init_select()
    
    file = open(str(filepath + change['new']))
    image = file.read()
    selected_image = widget.Image(value=image, format='png', width=128, height=32)
    
    digits = process_image(Image(filepath + change['new']), Image(filepath + change['new']).filename)
    postal = ''
    
    for d in digits:
        postal += str(predict(d)[0])
        
    postal_code = widget.Label(value="Predicted postal code: " + postal)
    display(selected_image, postal_code)
    

def init_select():
    images = [img for img in listdir(filepath) if isfile(join(filepath, img))]
    
    dropdown = widget.Dropdown(options=[""]+images, disabled=False)
    dropdown.observe(on_value_change, names='value')
    
    select_one_image = widget.Box([widget.Label(value="Select an image: "), dropdown])

    display(select_one_image)

with app:
    init_select()

# Digit Recognition Application

To predict a postal code using image recognition:
1. Add the images _(128x32pixels, format .png)_ you want to predict to the folder _"../dataset-images/"_
2. Run this Notebook by clicking "Kernel" --> "Restart & Run All"
3. In the app below, select the image you would like to predict the postal code for.

In [9]:
app

Output()