In [262]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import os.path as op
import shutil
from zipfile import ZipFile
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from utils import get_image_paths, word_from_image_path, preprocess_image, print_im, TextTransform, N_CHARS, SEQUENCE_LENGTH, IMAGE_DIMENSIONS

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [263]:
from multi_gpu import make_parallel

In [264]:
base_dir = '/mnt/mnt/ramdisk/max/90kDICT32px/'

In [265]:
images_paths = get_image_paths(base_dir)

In [266]:
text_transformer = TextTransform()

### creating a batch

In [None]:
%%time
batch_y = text_transformer.make_batch_labels(images_paths[:])

In [None]:
%%time
ims = [load_img(im, grayscale=True) for im in images_paths[:]]
ims = [preprocess_image(img_to_array(im)) for im in ims]
batch = np.array(ims)

In [None]:
%%time
batch = batch - batch.mean(axis=(1, 2)).reshape((-1, 1, 1))
batch = batch / batch.std(axis=(1, 2)).reshape((-1, 1, 1))

### Model creation

In [230]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Reshape, Activation
from keras.layers import Flatten

In [231]:
convolutions = [64, 128, 256, 512, 512]
kernels = [5, 5, 3, 3, 3]

model = Sequential()
input_shape = (None,) + IMAGE_DIMENSIONS + (1,)

model.add(Conv2D(nb_filter=64,
                     nb_row=kernels[0],
                     nb_col=kernels[0],
                     activation='relu',
                     border_mode='same',
                     batch_input_shape=input_shape, name="convo" + str(0)))

model.add(MaxPooling2D(pool_size=(2, 2), border_mode='same'))

for i, (kernel, convolution_size) in enumerate(zip(convolutions[1:], kernels[1:])):
    model.add(Conv2D(nb_filter=convolution_size,
                     nb_row=kernel,
                     nb_col=kernel,
                     activation='relu',
                     border_mode='same',
                     name="convo" + str(i + 1)))
    
    if i <= 3 :
        model.add(MaxPooling2D(pool_size=(2, 2), border_mode='same',))

model.add(Flatten())
# model.add(Dense(128, activation='relu'))
model.add(Dense(4096, activation='relu'))
model.add(Dense(4096, activation='relu'))

model.add(Dense(SEQUENCE_LENGTH * N_CHARS))

model.add(Reshape((SEQUENCE_LENGTH, N_CHARS)))
model.add(Activation('softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy')

In [232]:
p_model = make_parallel(model, 4)

In [233]:
p_model.compile(optimizer='adam', loss='categorical_crossentropy')

In [148]:
%%time
res = p_model.predict(batch[32*100:32*101, :, :].reshape((batch[32*100:32*101, :, :].shape[0],) + IMAGE_DIMENSIONS+ (1,)))

CPU times: user 112 ms, sys: 36 ms, total: 148 ms
Wall time: 474 ms


In [147]:
for i, by in enumerate(batch_y):
    try:
        print(text_transformer.word_from_matrix(res[i]), text_transformer.word_from_matrix(by))
    except:
        pass

sarti                   crustal                
sart                    paths                  
sart                    pace                   
sereiii                 arapahoes              
seeeiiie                retorts                
sereiii                 corrosively            
sarti                   betas                  
seeeiiii                interpenetration       
sarte                   temps                  
sait                    pb                     
seeeriiin               transliteration        
sereii                  buskin                 
sartie                  briton                 
seeeiii                 interacted             
sartie                  chained                
seeeiii                 regularizing           
sereiii                 cadenzas               
seeeiii                 teaspoons              
seeeiiie                
sartie                  overcoats              
sartie                  eulogizes              
seeeriiin      

  


In [None]:
biggest_batches = 32 * (batch.shape[0] // 32)

In [None]:
b = batch[:biggest_batches]
b_y = batch_y[:biggest_batches]

In [247]:
import pickle

In [269]:
import datetime

In [None]:
for epoch in range(10):
    history = p_model.fit(b.reshape((b.shape[0],) + IMAGE_DIMENSIONS + (1,)), b_y, nb_epoch=1)
    
    now = datetime.datetime.now().isoformat().split('.')[0]

    with open("state", 'a+') as f:
        f.write("epoch={}, loss={}, now={}\n".format(epoch, history.history['loss'][0], now))
    
    p_model.save('p_model_{}.h5'.format(epoch))

In [None]:
pickle.dump()