In [1]:
#import library
import collections
import random
import os
import time
import json
from PIL import Image
import io
import urllib
import uuid
from concurrent.futures import ThreadPoolExecutor
from functools import partial

import numpy as np
from tqdm import tqdm
from datasets import load_dataset
from datasets.utils.file_utils import get_datasets_user_agent
import matplotlib.pyplot as plt
import tensorflow as tf

#import other packages for dataset store
import pandas as pd
from datasets import Dataset, DatasetDict, load_from_disk

In [2]:
tf.__version__

'2.9.1'

In [3]:
# Seed value
seed_value= 1022

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)




### Download images and store dataset

In [4]:
# Add the relevant ISO code for the language you want to work with.
#iso639_3_letter_code = "hau"
iso639_3_letter_code = "tha"
#iso639_3_letter_code = "kir"

# Download the language specific dataset from HF.
dataset = load_dataset("sil-ai/bloom-captioning", iso639_3_letter_code, 
                       use_auth_token=True, download_mode='force_redownload')

Downloading builder script:   0%|          | 0.00/41.6k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/15.2k [00:00<?, ?B/s]

Downloading and preparing dataset bloom-captioning/tha (download: 168.28 MiB, generated: 2.08 MiB, post-processed: Unknown size, total: 170.36 MiB) to C:/Users/Zhiwen Yan/.cache/huggingface/datasets/sil-ai___bloom-captioning/tha/0.0.0/8efe15718b4a50170c9add75b453aec13ec1c5216111d21815428536fe5913ca...


Downloading data:   0%|          | 0.00/176M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/58 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/52 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/2913 [00:00<?, ? examples/s]

Dataset bloom-captioning downloaded and prepared to C:/Users/Zhiwen Yan/.cache/huggingface/datasets/sil-ai___bloom-captioning/tha/0.0.0/8efe15718b4a50170c9add75b453aec13ec1c5216111d21815428536fe5913ca. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# See what is included in the dataset object.
dataset

DatasetDict({
    test: Dataset({
        features: ['image_id', 'image_url', 'caption', 'story_id', 'album_id', 'license', 'original_bloom_language_tag', 'index_in_story'],
        num_rows: 58
    })
    validation: Dataset({
        features: ['image_id', 'image_url', 'caption', 'story_id', 'album_id', 'license', 'original_bloom_language_tag', 'index_in_story'],
        num_rows: 52
    })
    train: Dataset({
        features: ['image_id', 'image_url', 'caption', 'story_id', 'album_id', 'license', 'original_bloom_language_tag', 'index_in_story'],
        num_rows: 2913
    })
})

In [33]:
#actual download image from url
USER_AGENT = get_datasets_user_agent()

def fetch_single_image(image_url, timeout=None, retries=0):
    request = urllib.request.Request(
        image_url,
        data=None,
        headers={"user-agent": USER_AGENT},
    )
    with urllib.request.urlopen(request, timeout=timeout) as req:
        if 'png' in image_url:
          png = Image.open(io.BytesIO(req.read())).convert('RGBA')
          png.load() # required for png.split()
          background = Image.new("RGB", png.size, (255, 255, 255))
          background.paste(png, mask=png.split()[3]) # 3 is the alpha channel
          image_id = str(uuid.uuid4()) # confused about image_id here?
          image_path = "images_kir/" + image_id + ".jpg"
          background.save(image_path, 'JPEG', quality=80)
        else:
          image = Image.open(io.BytesIO(req.read()))
          image_id = str(uuid.uuid4())
          image_path = "images_kir/" + image_id + ".jpg"
          image.save(image_path)
    return image_path

def fetch_images(batch, num_threads, timeout=None, retries=3):
    fetch_single_image_with_args = partial(fetch_single_image, timeout=timeout, retries=retries)
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        batch["image_path"] = list(executor.map(fetch_single_image_with_args, batch["image_url"]))
    return batch

num_threads = 20
dataset = dataset.map(fetch_images, batched=True, batch_size=100, fn_kwargs={"num_threads": num_threads})

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/40 [00:00<?, ?ba/s]

In [34]:
# Check one of the training samples.
dataset['train'][0]
#checked, image, url, image_path is matched

{'image_id': '2888c885-b6ea-485f-bf09-5cbc7d988920',
 'image_url': 'https://bloom-vist.s3.amazonaws.com/%D0%9A%D2%AF%D0%B7%D0%B3%D2%AF/Kuzgu_3_str_Nasynbatova_Svetlana.jpg',
 'caption': '– Кел, бөлүнүп издеп көрөлү. Мен ашканадан издейин. А сен конок бөлмөсүн карачы, – деди Айдай.\nАдилет:\n– Туура айтасың. Экөөбүз тең бир жерден издесек, убакыт көп кетет.\nКонок бөлмөсүндө Адилет телевизордун жанын, дивандын үстүн карады. Ачкыч жок. Килемдин асты менен китеп шкафтын ичин карады. Ал жакта да жок экен.',
 'story_id': '99d18914-ca50-4a80-9d20-8cb510e644a2',
 'album_id': '9278349f-f0cc-4d87-a4b4-0da7992a7552',
 'license': 'cc-by-nc',
 'original_bloom_language_tag': 'ky',
 'index_in_story': 0,
 'image_path': 'images_kir/57cbe5f6-3003-4da0-b5de-22958bfdf749.jpg'}

In [35]:
#save dataset to local
dataset.save_to_disk("dataset_kir")

### Reload data and extract image feature by InceptionV3

In [44]:
#reload dataset
dataset = load_from_disk("dataset_kir")
test_dataset = load_from_disk("dataset_kir_test")

following steps are no longer needed once you generate numpy files

In [45]:
# Retrieve the InceptionV3 model
# Prepare images features with a pre-trained InceptionV3 model
image_model = tf.keras.applications.InceptionV3(include_top=False,
                                                weights='imagenet')
new_input = image_model.input
hidden_layer = image_model.layers[-1].output

image_features_extract_model = tf.keras.Model(new_input, hidden_layer)

In [46]:
# Cache image features
def load_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.keras.layers.Resizing(299, 299)(img)
    img = tf.keras.applications.inception_v3.preprocess_input(img) #1 299 299 3
    return img, image_path

In [81]:
# Get unique train images
# If we can load dataset from this point
encode_train = sorted(set(dataset['train']['image_path']))

# Feel free to change batch_size according to your system configuration
image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)
image_dataset = image_dataset.map(
  load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(16)

for img, path in tqdm(image_dataset):
  batch_features = image_features_extract_model(img) #16 8 8 2048
  batch_features = tf.reshape(batch_features,
                              (batch_features.shape[0], -1, batch_features.shape[3])) #1 64 2048

  for bf, p in zip(batch_features, path):
    path_of_feature = p.numpy().decode("utf-8")
    np.save(path_of_feature, bf.numpy())
    
# Get unique test images
encode_test = sorted(set(dataset['test']['image_path']))

# Feel free to change batch_size according to your system configuration
image_dataset_test = tf.data.Dataset.from_tensor_slices(encode_test)
image_dataset_test = image_dataset_test.map(
  load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(16)

for img, path in tqdm(image_dataset_test):
  batch_features = image_features_extract_model(img)
  batch_features = tf.reshape(batch_features,
                              (batch_features.shape[0], -1, batch_features.shape[3]))

  for bf, p in zip(batch_features, path):
    path_of_feature = p.numpy().decode("utf-8")
    np.save(path_of_feature, bf.numpy())

100%|██████████| 245/245 [06:45<00:00,  1.66s/it]
100%|██████████| 4/4 [00:07<00:00,  1.92s/it]


### Prepare caption

In [47]:
# Add some special tokens and clean up new line characters.
train_captions = [f"<start> {x} <end>" for x in dataset['train']['caption']]
train_captions = [x.replace('\n', ' ') for x in train_captions]
#test_captions = [f"<start> {x} <end>" for x in dataset['train']['caption']]
#test_captions = [x.replace('\n', ' ') for x in test_captions]

caption_dataset = tf.data.Dataset.from_tensor_slices(train_captions)

In [48]:
train_captions[0]

'<start> – Кел, бөлүнүп издеп көрөлү. Мен ашканадан издейин. А сен конок бөлмөсүн карачы, – деди Айдай. Адилет: – Туура айтасың. Экөөбүз тең бир жерден издесек, убакыт көп кетет. Конок бөлмөсүндө Адилет телевизордун жанын, дивандын үстүн карады. Ачкыч жок. Килемдин асты менен китеп шкафтын ичин карады. Ал жакта да жок экен. <end>'

In [49]:
# We will override the default standardization of TextVectorization to preserve
# "<>" characters, so we preserve the tokens for the <start> and <end>.
def standardize(inputs):
  inputs = tf.strings.lower(inputs)
  return tf.strings.regex_replace(inputs,
                                  r"!\"#$%&\(\)\*\+.,-/:;=?@\[\\\]^_`{|}~", "")

In [50]:
# Max word count for a caption.
max_length = 50
# Use the top 5000 words for a vocabulary.
vocabulary_size = 15000
tokenizer = tf.keras.layers.TextVectorization(
    max_tokens=vocabulary_size, #5000
    standardize=standardize,
    output_sequence_length=max_length)
# Learn the vocabulary from the caption data.
tokenizer.adapt(caption_dataset)

# Create the tokenized vectors
cap_vector = caption_dataset.map(lambda x: tokenizer(x))

# Create mappings for words to indices and indicies to words.
word_to_index = tf.keras.layers.StringLookup(
    mask_token="",
    vocabulary=tokenizer.get_vocabulary())
index_to_word = tf.keras.layers.StringLookup(
    mask_token="",
    vocabulary=tokenizer.get_vocabulary(),
    invert=True)


In [51]:
# more data prepare for traning
# Create some mas between images, vectors, and captions
img_to_cap_vector = collections.defaultdict(list)
for img, cap in zip(dataset['train']['image_path'], cap_vector):
  img_to_cap_vector[img].append(cap)

img_name_train = []
cap_train = []
for imgt in list(img_to_cap_vector.keys()):
  capt_len = len(img_to_cap_vector[imgt])
  img_name_train.extend([imgt] * capt_len)
  cap_train.extend(img_to_cap_vector[imgt])

#delete caption for test data

### Build models

In [52]:
# Feel free to change these parameters according to your system's configuration
BATCH_SIZE = 64
BUFFER_SIZE = 1000
embedding_dim = 256
units = 512
num_steps = len(img_name_train) // BATCH_SIZE

# Shape of the vector extracted from InceptionV3 is (64, 2048)
# These two variables represent that vector shape
features_shape = 2048
attention_features_shape = 64

In [53]:
# Load the numpy files
def map_func(img_name, cap):
  img_tensor = np.load(img_name.decode('utf-8')+'.npy')
  return img_tensor, cap

dataset_tf = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))

# Use map to load the numpy files in parallel
dataset_tf = dataset_tf.map(lambda item1, item2: tf.numpy_function(
          map_func, [item1, item2], [tf.float32, tf.int64]),
          num_parallel_calls=tf.data.AUTOTUNE)

# Shuffle and batch
dataset_tf = dataset_tf.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
dataset_tf = dataset_tf.prefetch(buffer_size=tf.data.AUTOTUNE)

##### Attention network

In [54]:
# define image captioning model
class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, features, hidden):
    # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)

    # hidden shape == (batch_size, hidden_size)
    # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
    hidden_with_time_axis = tf.expand_dims(hidden, 1)

    # attention_hidden_layer shape == (batch_size, 64, units)
    attention_hidden_layer = (tf.nn.tanh(self.W1(features) +
                                         self.W2(hidden_with_time_axis)))

    # score shape == (batch_size, 64, 1)
    # This gives you an unnormalized score for each image feature.
    score = self.V(attention_hidden_layer)

    # attention_weights shape == (batch_size, 64, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * features
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

##### CNN

In [55]:
class CNN_Encoder(tf.keras.Model):
    # Since you have already extracted the features and dumped it
    # This encoder passes those features through a Fully connected layer
    def __init__(self, embedding_dim):
        super(CNN_Encoder, self).__init__()
        # shape after fc == (batch_size, 64, embedding_dim)
        self.fc = tf.keras.layers.Dense(embedding_dim)

    def call(self, x):
        x = self.fc(x)
        x = tf.nn.relu(x)
        return x

##### RNN

In [56]:
class RNN_Decoder(tf.keras.Model):
  def __init__(self, embedding_dim, units, vocab_size):
    super(RNN_Decoder, self).__init__()
    self.units = units

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc1 = tf.keras.layers.Dense(self.units)
    self.fc2 = tf.keras.layers.Dense(vocab_size)

    self.attention = BahdanauAttention(self.units)

  def call(self, x, features, hidden):
    # defining attention as a separate model
    context_vector, attention_weights = self.attention(features, hidden)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x)

    # shape == (batch_size, max_length, hidden_size)
    x = self.fc1(output)

    # x shape == (batch_size * max_length, hidden_size)
    x = tf.reshape(x, (-1, x.shape[2]))

    # output shape == (batch_size * max_length, vocab)
    x = self.fc2(x)

    return x, state, attention_weights

  def reset_state(self, batch_size):
    return tf.zeros((batch_size, self.units))

##### Training config

In [57]:
# Initialize the encoder and decoder
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, tokenizer.vocabulary_size())

# Training config.
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

# Loss function to use during training.
def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)


In [58]:
# set up and execute training
# Make sure we save checkpoints during training
checkpoint_path = "./checkpoints/train_kir"
ckpt = tf.train.Checkpoint(encoder=encoder,
                           decoder=decoder,
                           optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)


start_epoch = 0
if ckpt_manager.latest_checkpoint:
  start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1])
  
  # restoring the latest checkpoint in checkpoint_path
  ckpt.restore(ckpt_manager.latest_checkpoint)
  
# adding this in a separate cell because if you run the training cell
# many times, the loss_plot array will be reset
loss_plot = []

In [59]:
@tf.function
def train_step(img_tensor, target):
  loss = 0
  accuracy = 0

  # initializing the hidden state for each batch
  # because the captions are not related from image to image
  hidden = decoder.reset_state(batch_size=target.shape[0])

  dec_input = tf.expand_dims([word_to_index('<start>')] * target.shape[0], 1)

  with tf.GradientTape() as tape:
      features = encoder(img_tensor)

      for i in range(1, target.shape[1]):
          # passing the features through the decoder
          predictions, hidden, _ = decoder(dec_input, features, hidden)

          loss += loss_function(target[:, i], predictions)
          accuracy += train_accuracy(target[:, i], predictions)

          # using teacher forcing
          dec_input = tf.expand_dims(target[:, i], 1)

  total_loss = (loss / int(target.shape[1]))
  total_accuracy = (accuracy / int(target.shape[1]))
  
  trainable_variables = encoder.trainable_variables + decoder.trainable_variables
  gradients = tape.gradient(loss, trainable_variables)
  optimizer.apply_gradients(zip(gradients, trainable_variables))

  

  return loss, total_loss, total_accuracy


##### Train the model

In [60]:
# Adjust this depending on how long you want to train
EPOCHS = 20

# Train our model!
for epoch in range(start_epoch, EPOCHS):
    start = time.time()
    total_loss = 0
    total_accuracy = 0

    for (batch, (img_tensor, target)) in enumerate(dataset_tf):
        batch_loss, t_loss, t_accuracy = train_step(img_tensor, target)
        total_loss += t_loss
        total_accuracy += t_accuracy

        if batch % 100 == 0:
            average_batch_loss = batch_loss.numpy()/int(target.shape[1])
            print(f'Epoch {epoch+1} Batch {batch} Loss {average_batch_loss:.4f}')
    # storing the epoch end loss value to plot later
    loss_plot.append(total_loss / num_steps)

    if epoch % 5 == 0:
      ckpt_manager.save()

    print(f'Epoch {epoch+1} Loss {total_loss/num_steps:.4f} Accuracy {total_accuracy/num_steps:.4f}')
    print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\n')

Epoch 1 Batch 0 Loss 4.8282
Epoch 1 Loss 4.4204 Accuracy 0.0546
Time taken for 1 epoch 456.27 sec

Epoch 2 Batch 0 Loss 3.8795
Epoch 2 Loss 4.1657 Accuracy 0.0692
Time taken for 1 epoch 393.91 sec

Epoch 3 Batch 0 Loss 3.5622
Epoch 3 Loss 4.0728 Accuracy 0.0722
Time taken for 1 epoch 363.41 sec

Epoch 4 Batch 0 Loss 3.4693
Epoch 4 Loss 3.9161 Accuracy 0.0752
Time taken for 1 epoch 360.83 sec

Epoch 5 Batch 0 Loss 3.6012
Epoch 5 Loss 3.8238 Accuracy 0.0779
Time taken for 1 epoch 425.09 sec

Epoch 6 Batch 0 Loss 3.6057
Epoch 6 Loss 3.7044 Accuracy 0.0807
Time taken for 1 epoch 365.83 sec

Epoch 7 Batch 0 Loss 3.4711
Epoch 7 Loss 3.6098 Accuracy 0.0832
Time taken for 1 epoch 487.52 sec

Epoch 8 Batch 0 Loss 2.9992
Epoch 8 Loss 3.5212 Accuracy 0.0854
Time taken for 1 epoch 463.03 sec

Epoch 9 Batch 0 Loss 3.0056
Epoch 9 Loss 3.4090 Accuracy 0.0874
Time taken for 1 epoch 445.94 sec

Epoch 10 Batch 0 Loss 2.9207
Epoch 10 Loss 3.2772 Accuracy 0.0894
Time taken for 1 epoch 384.28 sec

Epoch 11

In [None]:
# Visualize our loss
plt.plot(loss_plot)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Plot')
plt.show()

##### Prediction

In [61]:
def predict(image):
    attention_plot = np.zeros((max_length, attention_features_shape))

    hidden = decoder.reset_state(batch_size=1)

    temp_input = tf.expand_dims(load_image(image)[0], 0)
    img_tensor_val = image_features_extract_model(temp_input)
    img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0],
                                                 -1,
                                                 img_tensor_val.shape[3]))

    features = encoder(img_tensor_val)

    dec_input = tf.expand_dims([word_to_index('<start>')], 0)
    result = []

    for i in range(max_length):
        predictions, hidden, attention_weights = decoder(dec_input,
                                                         features,
                                                         hidden)

        attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()

        predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy()
        predicted_word = tf.compat.as_text(index_to_word(predicted_id).numpy())
        result.append(predicted_word)

        if predicted_word == '<end>':
            return result, attention_plot

        dec_input = tf.expand_dims([predicted_id], 0)

    attention_plot = attention_plot[:len(result), :]
    return result, attention_plot
    #return result

In [62]:
def plot_attention(image, result, attention_plot):
    temp_image = np.array(Image.open(image))

    fig = plt.figure(figsize=(10, 10))

    len_result = len(result)
    for i in range(len_result):
        temp_att = np.resize(attention_plot[i], (8, 8))
        grid_size = max(int(np.ceil(len_result/2)), 2)
        ax = fig.add_subplot(grid_size, grid_size, i+1)
        ax.set_title(result[i])
        img = ax.imshow(temp_image)
        ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent())

    plt.tight_layout()
    #plt.show()

In [63]:
#load test dataset
test_dataset = load_from_disk("dataset_kir_test")

In [64]:
test_dataset['train'][1]
len(test_dataset['train'])

67

In [65]:
#output the list with first var is image_id second var is predict caption
pred_data = []
for i in range(len(test_dataset['train'])):
    image = test_dataset['train']['image_path'][i]
    idx = test_dataset['train']['image_path'].index(image)
    image_id = test_dataset['train']['Id'][idx]
    result, attention_plot = predict(image)
    predicted_caption = ' '.join(result).replace(' <end>', '')
    """
    for j in predicted_caption:
        if j == "":
            predicted_caption.remove(j)
    """
    pred_data.append([image_id, predicted_caption])

In [67]:
#write to csv file
import csv

header = ["Id","predicted"]

with open('kir_result.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(header)

    # write multiple rows
    writer.writerows(pred_data)

In [40]:
test_dataset['train'][3]

{'Id': '0961231d-c9a7-4589-b5da-cf37b6016ccc_tha',
 'ImageURL': 'https://bloom-vist.s3.amazonaws.com/%E0%B8%AB%E0%B8%A1%E0%B8%B9%E0%B9%88%E0%B8%9A%E0%B9%89%E0%B8%B2%E0%B8%99%E0%B8%AB%E0%B9%89%E0%B8%A7%E0%B8%A2%E0%B9%81%E0%B8%AB%E0%B9%89%E0%B8%87../18.jpg',
 'ISO639-3': 'tha',
 'image_path': 'images_tha_test/aacedbd2-d836-4d05-aca9-30c28c789b06.jpg'}