In [1]:
#!/usr/bin/env python
import argparse
import os
import csv
import numpy as np
# try:
#     import better_exceptions
# except ImportError:
#     pass
from tqdm import trange
import tensorflow as tf
from src.model import crnn_fn
from src.data_handler import data_loader
from src.data_handler import preprocess_image_for_prediction

from src.config import Params, Alphabet, import_params_from_json

  from ._conv import register_converters as _register_converters


# Arguments and parameters

In [2]:
# parser = argparse.ArgumentParser()
# parser.add_argument('-ft', '--csv_files_train', required=True, type=str, help='CSV filename for training',
#                     nargs='*', default=None)
# parser.add_argument('-fe', '--csv_files_eval', type=str, help='CSV filename for evaluation',
#                     nargs='*', default=None)
# parser.add_argument('-o', '--output_model_dir', required=True, type=str,
#                     help='Directory for output', default='./estimator')
# parser.add_argument('-n', '--nb-epochs', type=int, default=30, help='Number of epochs')
# parser.add_argument('-g', '--gpu', type=str, help="GPU 0,1 or '' ", default='')
# parser.add_argument('-p', '--params-file', type=str, help='Parameters filename', default=None)
# args = vars(parser.parse_args())

csv_files_train = "./data/train.csv"
csv_files_eval = "./data/valid.csv"
output_model_dir = "./estimator"
n_epochs = 30
gpu = "0"

In [3]:
parameters = Params(train_batch_size=128,
                    eval_batch_size=128,
                    learning_rate=1e-3,  # 1e-3 recommended
                    learning_decay_rate=0.95,
                    learning_decay_steps=5000,
                    evaluate_every_epoch=5,
                    save_interval=5e3,
                    input_shape=(117, 1669),
                    optimizer='adam',
                    digits_only=False,
                    alphabet=Alphabet.MY_ALPHABET,
                    alphabet_decoding='same',
                    csv_delimiter='\t',
                    csv_files_eval=csv_files_eval,
                    csv_files_train=csv_files_train,
                    output_model_dir=output_model_dir,
                    n_epochs=n_epochs,
                    gpu=gpu
                    )

# Set up model before running

In [4]:
model_params = {
    'Params': parameters,
}

parameters.export_experiment_params()

os.environ['CUDA_VISIBLE_DEVICES'] = parameters.gpu
config_sess = tf.ConfigProto()
config_sess.gpu_options.per_process_gpu_memory_fraction = 0.8
config_sess.gpu_options.allow_growth = True

# Config estimator
est_config = tf.estimator.RunConfig()
est_config.replace(keep_checkpoint_max=10,
                   save_checkpoints_steps=parameters.save_interval,
                   session_config=config_sess,
                   save_checkpoints_secs=None,
                   save_summary_steps=1000,
                   model_dir=parameters.output_model_dir)

estimator = tf.estimator.Estimator(model_fn=crnn_fn,
                                   params=model_params,
                                   model_dir=parameters.output_model_dir,
                                   config=est_config
                                   )

INFO:tensorflow:Using config: {'_model_dir': './estimator', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fab44ad04e0>, '_task_type': 'worker', '_task_id': 0, '_global_id': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [5]:
# Count number of image filenames in csv
n_samples = 0
with open(parameters.csv_files_eval, 'r', encoding='utf8') as csvfile:
    reader = csv.reader(csvfile, delimiter=parameters.csv_delimiter)
    n_samples += len(list(reader))

# Run the model

In [6]:
try:
    for e in trange(0, parameters.n_epochs, parameters.evaluate_every_epoch):
        estimator.train(input_fn=data_loader(csv_filename=parameters.csv_files_train,
                                             params=parameters,
                                             batch_size=parameters.train_batch_size,
                                             num_epochs=parameters.evaluate_every_epoch,
                                             data_augmentation=True,
                                             image_summaries=True))
        estimator.evaluate(input_fn=data_loader(csv_filename=parameters.csv_files_eval,
                                                params=parameters,
                                                batch_size=parameters.eval_batch_size,
                                                num_epochs=1),
                           steps=np.floor(n_samples/parameters.eval_batch_size)
                           )

except KeyboardInterrupt:
    print('Interrupted')
    estimator.export_savedmodel(os.path.join(parameters.output_model_dir, 'export'),
                                preprocess_image_for_prediction(min_width=10))
    print('Exported model to {}'.format(os.path.join(parameters.output_model_dir, 'export')))

estimator.export_savedmodel(os.path.join(parameters.output_model_dir, 'export'),
                            preprocess_image_for_prediction(min_width=10))
print('Exported model to {}'.format(os.path.join(parameters.output_model_dir, 'export')))

  0%|          | 0/6 [00:00<?, ?it/s]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'>, Unquoted fields cannot have quotes/CRLFs inside
	 [[Node: csv_reading_op = DecodeCSV[OUT_TYPE=[DT_STRING, DT_STRING], field_delim="\t", na_value="", use_quote_delim=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](file_reading_op:1, csv_reading_op/record_defaults_0, csv_reading_op/record_defaults_0)]]
INFO:tensorflow:Saving checkpoints for 0 into ./estimator/model.ckpt.





InvalidArgumentError: Unquoted fields cannot have quotes/CRLFs inside
	 [[Node: csv_reading_op = DecodeCSV[OUT_TYPE=[DT_STRING, DT_STRING], field_delim="\t", na_value="", use_quote_delim=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](file_reading_op:1, csv_reading_op/record_defaults_0, csv_reading_op/record_defaults_0)]]