# Training

## Prepare training and validation datasets.

In [1]:
from dataset import MSCOCO, Vocabulary

In [2]:
dataset_path = '../datasets/'
ms_coco_train_caption_file = (
    dataset_path + 'ms_coco/annotations/captions_train2014.json'
) 
ms_coco_train_image_dir = (
    dataset_path + 'ms_coco/train2014/'
)
training_dataset = MSCOCO(
    caption_file=ms_coco_train_caption_file,
    image_dir=ms_coco_train_image_dir,
)

In [3]:
training_dataset.get_size()

414113

In [4]:
ms_coco_val_caption_file = (
    dataset_path + 'ms_coco/annotations/captions_val2014.json'
) 
ms_coco_val_image_dir = (
    dataset_path + 'ms_coco/val2014/'
)
validation_dataset = MSCOCO(
    caption_file=ms_coco_val_caption_file,
    image_dir=ms_coco_val_image_dir,
)

In [5]:
validation_dataset.get_size()

202654

## Build vocabulary from the training dataset.

In [6]:
vocabulary = Vocabulary(
    min_word_count=3,
    dataset=training_dataset,
)

In [7]:
vocabulary.get_size()

11498

In [8]:
vocabulary.save('ms_coco_vocabulary.json')

## Train the model.

In [9]:
from model import Image2Text

In [21]:
img2txt = Image2Text(
    training_dataset=training_dataset,
    validation_dataset=validation_dataset,
    vocabulary_file_path='ms_coco_vocabulary.json',
    config_file_path='default_config.json',
)

INFO:tensorflow:Restoring parameters from pretrained/inception_v3.ckpt


In [22]:
rd = img2txt.train(
    max_num_steps=10 ** 2,
)

Training for 100 steps, total training 100 steps (= 0.00772736 epochs).
0% : minibatch_loss = 9.35499
Image predictions
American_black_bear: 0.915226
sloth_bear: 0.0210429
brown_bear: 0.00383691
harmonica: 0.000695948
American_coot: 0.000481433
input: a black bear walking through a forest filled with trees .
output: modular breaking heap perfectly timber hike modular perfectly squatting patrolling searching costumes patrolling creates mattresses unloads pyramids undone hike effect british workspace condiment hike hoody tale part bruised pyramids checking clothed part loosely class checking swans murals searching chariot murals searching chariot murals searching chariot murals searching chariot murals searching chariot murals searching chariot murals searching chariot murals searching chariot


Type is unsupported, or the types of the items don't match field type in CollectionDef.
'TextSummaryPluginAsset' object has no attribute 'name'
checkpoint saved at 0822_021734/img2txt-0
1% : mini

## Load a saved model and do an additional training.

In [23]:
img2txt = Image2Text(
    training_dataset=training_dataset,
    validation_dataset=validation_dataset,
    vocabulary_file_path='ms_coco_vocabulary.json',
    config_file_path='0822_021734/config.json',
    checkpoint_save_path='0822_021734/img2txt-100',
)

INFO:tensorflow:Restoring parameters from pretrained/inception_v3.ckpt
INFO:tensorflow:Restoring parameters from 0822_021734/img2txt-100


In [24]:
rd = img2txt.train(
    additional_num_steps=100,
)

Training for 100 steps, total training 201 steps (= 0.015532 epochs).
0% : minibatch_loss = 5.19323
Image predictions
lakeside: 0.443298
paddle: 0.246256
seashore: 0.0399927
breakwater: 0.037806
sandbar: 0.0191409
input: someone surfing waves on their surf board in the ocean
output: a two an are group is are with is a are in a in the a in on a in on a in on a the in a <eos> the a . the a . the a . the a . the a . the a . the a . the a . the a . the a . the a . the a . the a . the a . the


Type is unsupported, or the types of the items don't match field type in CollectionDef.
'TextSummaryPluginAsset' object has no attribute 'name'
checkpoint saved at 0822_022050/img2txt-101
1% : minibatch_loss = 5.62502
Image predictions
steam_locomotive: 0.968069
bib: 0.000701917
marmot: 0.000342524
electric_locomotive: 0.000334331
passenger_car: 0.000294882
input: a man walking by an old fashioned steam train
output: two a the man group people of is in in on and in on and in and with in the and . in 