In [1]:
%matplotlib inline
import os
import sys
import pylab
import random
import numpy as np
import pandas as pd
import cPickle as pkl
from lasagne import layers
from bs4 import BeautifulSoup as bs
from theano.tensor.nnet import softmax
from scipy.misc import imread, imresize
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

repo_location = '/workspace/project/project/'
data_root = os.path.join(os.path.expanduser('~') + repo_location + 'datasets/')
script_root = os.path.join(os.path.expanduser('~') + repo_location + 'scripts/')
model_root = os.path.join(os.path.expanduser('~') + repo_location + 'models/')

Using gpu device 0: GeForce GT 740M (CNMeM is disabled)


In [54]:
# Define functions
number = '0123456789'
alphabet = 'abcdefghijklmnopqrstuvwxyz'

def classer(element):
    if type(element) == unicode:
        element = element.encode('ascii')
    if (0 <= element < 10):
        return (element)
    elif (10 <= element < 36) :
        return (alphabet[element - 10].upper())
    elif (36 <= element < 62):
        return (alphabet[element - 36])
    elif element in alphabet or element in alphabet.upper() or element in number:
        return element
    else : 
        print 'do u recognize this? %r' % element

In [52]:
# Load dataset
# chars74k
data1 = pd.read_csv(script_root + 'LISTFILE.txt', sep = ' ', header = None)
print 'Loaded chars74k'

# icdar03
soup = bs(open(data_root + 'icdar03/train/char/char.xml').read(), 'lxml-xml')
X = []
y = []
for image in soup('image'):
    try:
        img = imread(data_root + 'icdar03/train/char/' + image['file'])
        X.append(img)
        y.append(image['tag'])
    except:
        pass
    
data2 = pd.DataFrame({'image' : X, 'label' : y})
# drop extra labels
data2 = data2.loc[~data2['label'].isin([':', '-', '.', '\'', '!', '(', '"', ')', '&', '?', u'\xa3', u'\xc9', u'\xd1', u'\xe9'])]
print 'Loaded icdar03'

Loaded chars74k
Loaded icdar03


In [55]:
# Reshape images to 32x32 and convert to grayscale
# chars74k
data1_x = np.zeros((data1[0].count(), 1, 32, 32))
data1_y = map(classer, data1[1])

for idx, path in enumerate(data1[0]):
    img = imread(data_root + 'English/' + path)
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data1_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data1_x[idx, ...] = img
        
data1_x = data1_x.astype('float32')
print 'chars74k reshaped and grayscaled'

# icdar03
data2_x = np.zeros((data2['image'].count(), 1, 32, 32))
data2_y = map(classer, data2['label'].values)

for idx, img in enumerate(data2['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data2_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data2_x[idx, ...] = img
        
data2_x = data2_x.astype('float32')
print 'icdar03 reshaped and grayscaled'

chars74k reshaped and grayscaled
icdar03 reshaped and grayscaled


In [56]:
# Normalize by MuSigma
data1_x /= data1_x.std(axis = None)
data1_x -= data1_x.mean()

data2_x /= data2_x.std(axis = None)
data2_x -= data2_x.mean()

In [57]:
# concat both datasets
data_x = np.vstack((data1_x, data2_x))
data_y = np.concatenate([data1_y, data2_y])

In [58]:
print data_x.shape, data_y.shape, type(data_x), type(data_y)

(13818, 1, 32, 32) (13818,) <type 'numpy.ndarray'> <type 'numpy.ndarray'>


In [67]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),
        ('conv3', layers.Conv2DLayer),
        ('hidden4', layers.DenseLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 32, conv1_filter_size = (5, 5),
    pool1_pool_size = (2, 2),
    dropout1_p = 0.2,
    conv2_num_filters = 64, conv2_filter_size = (5, 5),
    pool2_pool_size = (2, 2),
    dropout2_p = 0.2,
    conv3_num_filters = 128, conv3_filter_size = (5, 5),
    hidden4_num_units = 128,
    output_num_units = 62, output_nonlinearity = softmax,

    batch_iterator_train = BatchIterator(batch_size = 2500),
    batch_iterator_test = BatchIterator(batch_size = 2500),

    update_learning_rate = 0.01,
    update_momentum = 0.9,

    use_label_encoder = True,
    regression = False,
    max_epochs = 250,
    verbose = 1,
)

In [66]:
# train nn
net.fit(data_x, data_y);

# Neural Network with 281534 learnable parameters

## Layer information

  #  name      size
---  --------  --------
  0  input     1x32x32
  1  conv1     32x28x28
  2  pool1     32x14x14
  3  dropout1  32x14x14
  4  conv2     64x10x10
  5  pool2     64x5x5
  6  dropout2  64x5x5
  7  conv3     128x1x1
  8  hidden4   128
  9  output    62

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  ------
      1       [36m4.14672[0m       [32m4.12919[0m      1.00424      0.01145  11.24s
      2       [36m4.14237[0m       [32m4.12485[0m      1.00425      0.01684  11.02s
      3       [36m4.13745[0m       [32m4.11984[0m      1.00427      0.02070  11.02s
      4       [36m4.13313[0m       [32m4.11476[0m      1.00446      0.03168  11.00s
      5       [36m4.12832[0m       [32m4.10984[0m      1.00450      0.04186  10.59s
      6       [36m4.12364[0m       [32m4.10513[0m      1.00451      0.05051  1