In [1]:
#!/usr/bin/env python
import argparse
import os
import csv
import numpy as np
# try:
#     import better_exceptions
# except ImportError:
#     pass
from tqdm import trange
import tensorflow as tf
from src.model import crnn_fn
from src.data_handler import data_loader
from src.data_handler import preprocess_image_for_prediction

from src.config import Params, Alphabet, import_params_from_json

# Arguments and parameters

In [2]:
# parser = argparse.ArgumentParser()
# parser.add_argument('-ft', '--csv_files_train', required=True, type=str, help='CSV filename for training',
#                     nargs='*', default=None)
# parser.add_argument('-fe', '--csv_files_eval', type=str, help='CSV filename for evaluation',
#                     nargs='*', default=None)
# parser.add_argument('-o', '--output_model_dir', required=True, type=str,
#                     help='Directory for output', default='./estimator')
# parser.add_argument('-n', '--nb-epochs', type=int, default=30, help='Number of epochs')
# parser.add_argument('-g', '--gpu', type=str, help="GPU 0,1 or '' ", default='')
# parser.add_argument('-p', '--params-file', type=str, help='Parameters filename', default=None)
# args = vars(parser.parse_args())

csv_files_train = "./data/train.csv"
csv_files_eval = "./data/valid.csv"
output_model_dir = "./estimator"
n_epochs = 30
gpu = "0"

In [3]:
parameters = Params(train_batch_size=128,
                    eval_batch_size=128,
                    learning_rate=1e-3,  # 1e-3 recommended
                    learning_decay_rate=0.95,
                    learning_decay_steps=5000,
                    evaluate_every_epoch=5,
                    save_interval=5e3,
                    input_shape=(117, 1669),
                    optimizer='adam',
                    digits_only=False,
                    alphabet=Alphabet.MY_ALPHABET,
                    alphabet_decoding='same',
                    csv_delimiter=';',
                    csv_files_eval=csv_files_eval,
                    csv_files_train=csv_files_train,
                    output_model_dir=output_model_dir,
                    n_epochs=n_epochs,
                    gpu=gpu
                    )

# Set up model before running

In [4]:
model_params = {
    'Params': parameters,
}

parameters.export_experiment_params()

os.environ['CUDA_VISIBLE_DEVICES'] = parameters.gpu
config_sess = tf.ConfigProto()
config_sess.gpu_options.per_process_gpu_memory_fraction = 0.8
config_sess.gpu_options.allow_growth = True

# Config estimator
est_config = tf.estimator.RunConfig()
est_config.replace(keep_checkpoint_max=10,
                   save_checkpoints_steps=parameters.save_interval,
                   session_config=config_sess,
                   save_checkpoints_secs=None,
                   save_summary_steps=1000,
                   model_dir=parameters.output_model_dir)

estimator = tf.estimator.Estimator(model_fn=crnn_fn,
                                   params=model_params,
                                   model_dir=parameters.output_model_dir,
                                   config=est_config
                                   )

INFO:tensorflow:Using config: {'_model_dir': './estimator', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001E1E6A409E8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [5]:
# Count number of image filenames in csv
n_samples = 0
with open(parameters.csv_files_eval, 'r', encoding='utf8') as csvfile:
    reader = csv.reader(csvfile, delimiter=parameters.csv_delimiter)
    n_samples += len(list(reader))

# Run the model

In [6]:
try:
    for e in trange(0, parameters.n_epochs, parameters.evaluate_every_epoch):
        estimator.train(input_fn=data_loader(csv_filename=parameters.csv_files_train,
                                             params=parameters,
                                             batch_size=parameters.train_batch_size,
                                             num_epochs=parameters.evaluate_every_epoch,
                                             data_augmentation=True,
                                             image_summaries=True))
        estimator.evaluate(input_fn=data_loader(csv_filename=parameters.csv_files_eval,
                                                params=parameters,
                                                batch_size=parameters.eval_batch_size,
                                                num_epochs=1),
                           steps=np.floor(n_samples/parameters.eval_batch_size)
                           )

except KeyboardInterrupt:
    print('Interrupted')
    estimator.export_savedmodel(os.path.join(parameters.output_model_dir, 'export'),
                                preprocess_image_for_prediction(min_width=10))
    print('Exported model to {}'.format(os.path.join(parameters.output_model_dir, 'export')))

estimator.export_savedmodel(os.path.join(parameters.output_model_dir, 'export'),
                            preprocess_image_for_prediction(min_width=10))
print('Exported model to {}'.format(os.path.join(parameters.output_model_dir, 'export')))

  0%|                                                            | 0/6 [00:00<?, ?it/s]

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./estimator\model.ckpt-0





InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [3840,1024] rhs shape= [768,1024]
	 [[Node: save/Assign_89 = Assign[T=DT_FLOAT, _class=["loc:@stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/basic_lstm_cell/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/basic_lstm_cell/kernel/Adam_1, save/RestoreV2_89)]]

Caused by op 'save/Assign_89', defined at:
  File "C:\Users\danny\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\danny\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\danny\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\danny\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\danny\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\danny\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\danny\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\danny\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\danny\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\danny\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-4e0398d94274>", line 8, in <module>
    image_summaries=True))
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 302, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 780, in _train_model
    log_step_count_steps=self._config.log_step_count_steps) as mon_sess:
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 368, in MonitoredTrainingSession
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 673, in __init__
    stop_grace_period_secs=stop_grace_period_secs)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 493, in __init__
    self._sess = _RecoverableSession(self._coordinated_creator)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 851, in __init__
    _WrappedSession.__init__(self, self._create_session())
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 856, in _create_session
    return self._sess_creator.create_session()
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 554, in create_session
    self.tf_sess = self._session_creator.create_session()
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 419, in create_session
    self._scaffold.finalize()
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 212, in finalize
    self._saver.build()
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1227, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 1263, in _build
    build_save=build_save, build_restore=build_restore)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 745, in _build_internal
    restore_sequentially, reshape)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 470, in _AddShardedRestoreOps
    name="restore_shard"))
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 439, in _AddRestoreOps
    assign_ops.append(saveable.restore(tensors, shapes))
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py", line 160, in restore
    self.op.get_shape().is_fully_defined())
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\ops\state_ops.py", line 276, in assign
    validate_shape=validate_shape)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_state_ops.py", line 56, in assign
    use_locking=use_locking, name=name)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "C:\Users\danny\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match. lhs shape= [3840,1024] rhs shape= [768,1024]
	 [[Node: save/Assign_89 = Assign[T=DT_FLOAT, _class=["loc:@stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/basic_lstm_cell/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/basic_lstm_cell/kernel/Adam_1, save/RestoreV2_89)]]
