In [19]:
%matplotlib inline
import os
import sys
import pylab
import random
import numpy as np
import pandas as pd
import cPickle as pkl
from lasagne import layers
from bs4 import BeautifulSoup as bs
from lasagne import updates
from theano.tensor.nnet import softmax
from scipy.misc import imread, imresize
from nolearn.lasagne import NeuralNet, BatchIterator
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score

repo_location = '/workspace/project/project/'
data_root = os.path.join(os.path.expanduser('~') + repo_location + 'datasets/')
script_root = os.path.join(os.path.expanduser('~') + repo_location + 'scripts/')
model_root = os.path.join(os.path.expanduser('~') + repo_location + 'models/')

In [4]:
# Load dataset
train_soup = bs(open(data_root + 'icdar03/train/char/char.xml').read(), 'lxml-xml')
test_soup = bs(open(data_root + 'icdar03/test/char/char.xml').read(), 'lxml-xml')

X_train = []
y_train = []
X_test = []
y_test = []

for image in train_soup('image'):
    try:
        img = imread(data_root + 'icdar03/train/char/' + image['file'])
        X_train.append(img)
        y_train.append(image['tag'])
    except:
        pass
    
for image in test_soup('image'):
    try:
        img = imread(data_root + 'icdar03/test/char/' + image['file'])
        X_test.append(img)
        y_test.append(image['tag'])
    except:
        pass

    
data_train = pd.DataFrame({'image' : X_train, 'label' : y_train})
data_test = pd.DataFrame({'image' : X_test, 'label' : y_test})

print 'Loaded icdar03'

Loaded icdar03


In [6]:
# Reshape images to 32x32 and convert to grayscale
data_train_x = np.zeros((data_train['image'].count(), 1, 32, 32))
data_train_y = data_train['label'].values
data_test_x = np.zeros((data_test['image'].count(), 1, 32, 32))
data_test_y = data_test['label'].values

for idx, img in enumerate(data_train['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_train_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_train_x[idx, ...] = img
        
for idx, img in enumerate(data_test['image']):
    img = imresize(img, (32, 32))
    if len(img.shape) == 3:
        data_test_x[idx, ...] = img.dot([0.299, 0.587, 0.144])
    else:
        data_test_x[idx, ...] = img
        
data_train_x = data_train_x.astype('float32')
data_test_x = data_test_x.astype('float32')
print 'icdar03 reshaped and grayscaled'

icdar03 reshaped and grayscaled


In [7]:
# Normalize by MuSigma
data_train_x /= data_train_x.std(axis = None)
data_train_x -= data_train_x.mean()

data_test_x /= data_test_x.std(axis = None)
data_test_x -= data_test_x.mean()

In [13]:
print data_train_x.shape, data_train_y.shape, data_test_x.shape, data_test_y.shape

(6185, 1, 32, 32) (6185,) (5430, 1, 32, 32) (5430,)


In [25]:
# setting nn 
net = NeuralNet(
    layers = [
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),
        ('pool1', layers.MaxPool2DLayer),
        ('dropout1', layers.DropoutLayer),
        ('conv2', layers.Conv2DLayer),
        ('pool2', layers.MaxPool2DLayer),
        ('dropout2', layers.DropoutLayer),
        ('conv3', layers.Conv2DLayer),
        ('dropout3', layers.DropoutLayer),
        ('hidden4', layers.DenseLayer),
        ('output', layers.DenseLayer),
    ],

    input_shape = (None, 1, 32, 32),
    conv1_num_filters = 32, conv1_filter_size = (5, 5),
    pool1_pool_size = (2, 2),
    dropout1_p = 0.2,
    conv2_num_filters = 64, conv2_filter_size = (5, 5),
    pool2_pool_size = (2, 2),
    dropout2_p = 0.3,
    conv3_num_filters = 128, conv3_filter_size = (5, 5),
    dropout3_p = 0.5,
    hidden4_num_units = 128,
    output_num_units = 75, output_nonlinearity = softmax,

    batch_iterator_train = BatchIterator(batch_size = 2500),
    batch_iterator_test = BatchIterator(batch_size = 2500),

    update = updates.adam,

    use_label_encoder = True,
    regression = False,
    max_epochs = 250,
    verbose = 1,
)

In [26]:
# train nn
net.fit(data_train_x, data_train_y);

# Neural Network with 283211 learnable parameters

## Layer information

  #  name      size
---  --------  --------
  0  input     1x32x32
  1  conv1     32x28x28
  2  pool1     32x14x14
  3  dropout1  32x14x14
  4  conv2     64x10x10
  5  pool2     64x5x5
  6  dropout2  64x5x5
  7  conv3     128x1x1
  8  dropout3  128x1x1
  9  hidden4   128
 10  output    75

  epoch    train loss    valid loss    train/val    valid acc  dur
-------  ------------  ------------  -----------  -----------  -----
      1       [36m4.32183[0m       [32m4.27056[0m      1.01200      0.04413  4.75s
      2       [36m4.22851[0m       [32m4.16485[0m      1.01528      0.04413  4.75s
      3       [36m4.15465[0m       [32m4.09075[0m      1.01562      0.04413  4.75s
      4       [36m4.06992[0m       [32m4.03824[0m      1.00784      0.05280  4.75s
      5       [36m3.99841[0m       [32m3.96425[0m      1.00862      0.05122  4.89s
      6       [36m3.93319[0m       [32m3.92009[0m      1.0033

In [27]:
pred = net.predict(data_test_x)
print accuracy_score(data_test_y, pred)

0.734069981584


In [28]:
print classification_report(data_test_y, pred)

             precision    recall  f1-score   support

          !       0.40      0.25      0.31         8
          "       0.00      0.00      0.00         1
          &       1.00      0.14      0.25         7
          '       0.00      0.00      0.00         8
          (       0.00      0.00      0.00         1
          )       0.00      0.00      0.00         1
          ,       0.00      0.00      0.00         6
          -       1.00      0.25      0.40         4
          .       0.14      0.09      0.11        11
          0       0.00      0.00      0.00        46
          1       0.91      0.43      0.59        46
          2       0.72      0.78      0.75        49
          3       1.00      0.41      0.58        17
          4       0.62      0.33      0.43        24
          5       0.44      0.28      0.34        29
          6       0.89      0.53      0.67        15
          7       0.00      0.00      0.00        10
          8       0.50      0.17      0.25   