<a href="https://colab.research.google.com/github/MAbuTalha/Neural-Machine-Translation-NMT-/blob/main/Machine_Translation_Word_level_model_(English_to_Urdu).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import string, json, re, sys, time, os
from string import digits
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from config_translator import general

In [2]:
data_path = "/home2/data/"
project_path = "/home/mateusz/projects/Neural-Machine-Translation-NMT-/"
test_path=project_path + "dataset/test.json"
train_path=project_path + "dataset/test.json"
glove = {
    "word_embedings_path": data_path + "images/glove/glove.6B.200d.txt",
    "embedings_dim": 199
}
config = {
    "dataset_name": "coco14",
    "config_name": "coco14",
    "results_dir": project_path + "results/",
    "pickles_dir": "/Pickle",
    "coco-caption_path": "./coco-caption"
}
EMBEDDING_SIZE = glove["embedings_dim"]

In [3]:
def load_dataset(path):
    with open(path, 'r') as f:
        train_dataset = json.load(f)

    bbox_categories_list = []
    output_sentences_list = []
    output_sentences_per_image_list = []
    image_id_list = []
    for pair in train_dataset:
        bbox_categories = pair["bbox_categories"]
        image_id = pair['image_id']
        bbox_categories = ' '.join(map(str, bbox_categories))
        output_sentences = pair["captions"]
        output_sentences_per_image_list.append(output_sentences)
        for sentence in output_sentences:
            output_sentences_list.append(sentence)
            bbox_categories_list.append(bbox_categories)
            image_id_list.append(image_id)
    return image_id_list, bbox_categories_list, output_sentences_list, output_sentences_per_image_list

def clear(lines):
    # Lowercase all characters
    lines.eng=lines.eng.apply(lambda x: x.lower())
    lines.urdu=lines.urdu.apply(lambda x: x.lower())

    # Remove quotes
    lines.eng=lines.eng.apply(lambda x: re.sub("'", '', x))
    lines.urdu=lines.urdu.apply(lambda x: re.sub("'", '', x))

    exclude = set(string.punctuation) # Set of all special characters

    # Remove all the special characters
    lines.eng=lines.eng.apply(lambda x: ''.join(ch for ch in x if ch not in exclude))
    lines.urdu=lines.urdu.apply(lambda x: ''.join(ch for ch in x if ch not in exclude))

    # Remove all numbers from text
    remove_digits = str.maketrans('', '', digits)
    lines.eng=lines.eng.apply(lambda x: x.translate(remove_digits))

    # Remove extra spaces
    lines.eng=lines.eng.apply(lambda x: x.strip())
    lines.urdu=lines.urdu.apply(lambda x: x.strip())
    lines.eng=lines.eng.apply(lambda x: re.sub(" +", " ", x))
    lines.urdu=lines.urdu.apply(lambda x: re.sub(" +", " ", x))

    # Add start and end tokens to target sequences
    lines.urdu = lines.urdu.apply(lambda x : 'START_ '+ x + ' _END')
    print("Sample clean lines")
    print(lines.sample(10))
    return lines

def define_vocabulary(lines):

    # Vocabulary of English
    all_eng_words=set()
    for eng in lines.eng:
        for word in eng.split():
            if word not in all_eng_words:
                all_eng_words.add(word)

    # Vocabulary of Urdu
    all_urdu_words=set()
    for urdu in lines.urdu:
        for word in urdu.split():
            if word not in all_urdu_words:
                all_urdu_words.add(word)

    return all_eng_words, all_urdu_words

# Max Length of source sequence
def max_src_trg_length(lines):
    lenght_list=[]
    for l in lines.eng:
        lenght_list.append(len(l.split(' ')))
    max_length_src = np.max(lenght_list)
    print('Max Source Length:',max_length_src)

    # Max Length of target sequence
    lenght_list=[]
    for l in lines.urdu:
        lenght_list.append(len(l.split(' ')))
    max_length_tar = np.max(lenght_list)
    print('Max Target Lenght:',max_length_tar)
    return max_length_src, max_length_tar

def input_trg_words(all_eng_words, all_urdu_words):
    input_words = sorted(list(all_eng_words))
    target_words = sorted(list(all_urdu_words))
    num_encoder_tokens = len(all_eng_words)+1
    num_decoder_tokens = len(all_urdu_words)+1
    num_encoder_tokens, num_decoder_tokens
    return input_words, target_words, num_encoder_tokens, num_decoder_tokens

def reverse_index(input_words, target_words, lines):
    input_token_index = dict([(word, i+1) for i, word in enumerate(input_words)])
    target_token_index = dict([(word, i+1) for i, word in enumerate(target_words)])

    reverse_input_char_index = dict((i, word) for word, i in input_token_index.items())
    reverse_target_char_index = dict((i, word) for word, i in target_token_index.items())

    lines = shuffle(lines)
    print("Shuffled lines")
    lines.head(10)
    return input_token_index, target_token_index, reverse_input_char_index, reverse_target_char_index

def get_embedding_matrix(vocab_size, wordtoix, word_embedings_path, embedings_dim):
    def isfloat(value):
        try:
            float(value)
            return True
        except ValueError:
            return False

    embeddings_index = {}
    # From the embeddings matrix get coefficients of particular words and store the in dictionarym by key - words
    f = open(word_embedings_path, encoding="utf-8")
    for line in f:
        values = line.split()
        word = values[0]
        import re
        if isfloat(values[1]):
            coefs = np.asarray(values[2:], dtype='float32')
        elif isfloat(values[2]):
            coefs = np.asarray(values[3:], dtype='float32')
        elif isfloat(values[3]):
            coefs = np.asarray(values[4:], dtype='float32')
        elif isfloat(values[4]):
            coefs = np.asarray(values[5:], dtype='float32')
        else:
            coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()
    print('Found %s word vectors.' % len(embeddings_index))
    # Get 200-dim/100 dense vector for each of the 10000 words in out vocabulary
    embedding_matrix = np.zeros((vocab_size, embedings_dim))
    for word, i in wordtoix.items():
        # if i < max_words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # Words not found in the embedding index will be all zeros
            # 1655,299 199
            embedding_matrix[i] = embedding_vector
    print("Shape of embedding matrix")
    print(embedding_matrix.shape)
    return embedding_matrix

In [5]:
image_id_list, bbox_categories_list_train, output_sentences_list_train, _ = load_dataset(train_path)
lines=pd.DataFrame({'image_id': pd.Series(image_id_list), 'eng': pd.Series(bbox_categories_list_train), 'urdu':pd.Series(output_sentences_list_train)})
print(lines.head)

<bound method NDFrame.head of                            image_id          eng  \
0       COCO_train2014_000000406533  person skis   
1       COCO_train2014_000000406533  person skis   
2       COCO_train2014_000000406533  person skis   
3       COCO_train2014_000000406533  person skis   
4       COCO_train2014_000000406533  person skis   
...                             ...          ...   
561895  COCO_train2014_000000378868          dog   
561896  COCO_train2014_000000378868          dog   
561897  COCO_train2014_000000378868          dog   
561898  COCO_train2014_000000378868          dog   
561899  COCO_train2014_000000378868          dog   

                                                     urdu  
0                Someone is standing in the snow on skies  
1           The skier wearing an orange jacket is waving.  
2       A female skier is waving and smiling for the c...  
3       a female snow skier in an orange jacket waving...  
4           a woman is smiling while on skies

In [6]:
lines = clear(lines)

Sample clean lines
                           image_id  \
311948  COCO_train2014_000000037282   
80303   COCO_train2014_000000163220   
193209  COCO_train2014_000000564339   
515940  COCO_train2014_000000054850   
391976  COCO_train2014_000000441232   
459216  COCO_train2014_000000210570   
155821  COCO_train2014_000000110310   
516551  COCO_train2014_000000310732   
222252    COCO_val2014_000000012413   
277660    COCO_val2014_000000115222   

                                                      eng  \
311948                    tie person person cake fork cup   
80303   potted plant potted plant horse car car car bu...   
193209                                             person   
515940                              person person frisbee   
391976                                               sink   
459216                                       person bench   
155821                                           airplane   
516551                                   person surfboard   
22

In [7]:
all_eng_words, all_urdu_words = define_vocabulary(lines)
max_length_src, max_length_tar = max_src_trg_length(lines)
input_words, target_words, num_encoder_tokens, num_decoder_tokens = input_trg_words(all_eng_words, all_urdu_words)

Max Source Length: 106
Max Target Lenght: 51


In [8]:
num_decoder_tokens += 1 # For zero padding
num_decoder_tokens
input_token_index, target_token_index, reverse_input_char_index,reverse_target_char_index = reverse_index(input_words, target_words, lines)

Shuffled lines


In [9]:
embedding_matrix_input = get_embedding_matrix(num_encoder_tokens, input_token_index,
                                                   glove["word_embedings_path"],
                                                   EMBEDDING_SIZE)

Found 400000 word vectors.
Shape of embedding matrix
(93, 199)


**Creating training and test dataset**


In [10]:
# Train - Test Split
X, y = lines.eng, lines.urdu

In [11]:
#Save the train and test dataframes for reproducing the results later, as they are shuffled.

X.to_pickle('X_train.pkl')

In [12]:
def generate_batch(X, y, input_token_index, target_token_index,max_length_src, max_length_tar, batch_size = 64 ):
    ''' Generate a batch of data '''
    while True:
        for j in range(0, len(X), batch_size):
            encoder_input_data = np.zeros((batch_size, max_length_src),dtype='float32')
            decoder_input_data = np.zeros((batch_size, max_length_tar),dtype='float32')
            decoder_target_data = np.zeros((batch_size, max_length_tar, num_decoder_tokens),dtype='float32')
            for i, (input_text, target_text) in enumerate(zip(X[j:j+batch_size], y[j:j+batch_size])):
                for t, word in enumerate(input_text.split()):
                    encoder_input_data[i, t] = input_token_index[word] # encoder input seq
                for t, word in enumerate(target_text.split()):
                    if t>0:
                        # decoder target sequence (one hot encoded)
                        # does not include the START_ token
                        # Offset by one timestep
                        decoder_target_data[i, t - 1, target_token_index[word]] = 1.
            yield([encoder_input_data, decoder_input_data], decoder_target_data)

In [13]:
train_samples = len(X)
batch_size = 64
epochs = 5
latent_dim = 256
print(train_samples//batch_size)
# Encoder
encoder_inputs =  Input(shape=(max_length_src,))
enc_emb =  Embedding(num_encoder_tokens, EMBEDDING_SIZE, weights=[embedding_matrix_input], input_length=max_length_src,)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(max_length_tar,))
dec_emb_layer = Embedding(num_decoder_tokens, latent_dim)
dec_emb = dec_emb_layer(decoder_inputs)
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                     initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

8779


2023-07-01 17:25:25.258984: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-01 17:25:25.261000: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [14]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

In [None]:
es = EarlyStopping(monitor='loss', min_delta=0.001, patience=3)
filepath = config["results_dir"] + 'model_Base_3_Batch_Komninos.h5'
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True,
                             mode='min', save_weights_only=False)
callbacks_list = [checkpoint, es, CSVLogger(config["results_dir"] +'logs.csv',
                                            separator=",", append=True),]
batch_generator = generate_batch(X, y, input_token_index, target_token_index, max_length_src, max_length_tar, batch_size = batch_size)
model.fit(batch_generator,
                    steps_per_epoch = train_samples//batch_size,
                    epochs=epochs,
                    callbacks=[callbacks_list], verbose=1
         )

In [15]:
#Always remember to save the weights

model.save_weights('model_Base_3_Batch_Komninos')

In [18]:
#Load the weights, if you close the application
model.load_weights(config["results_dir"] + 'model_Base_3_Batch_Komninos.h5')
#Inference Setup

# Encode the input sequence to get the "thought vectors"
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

dec_emb2= dec_emb_layer(decoder_inputs) # Get the embeddings of the decoder sequence

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2) # A dense softmax layer to generate prob dist. over the target vocabulary

# Final decoder model
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2)


In [None]:
**Evaluation on Validation Dataset**

In [19]:
#Decode sample sequeces
def decode_sequence(input_seq, encoder_model, decoder_model, target_token_index, reverse_target_char_index):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['START_']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += ' '+sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '_END' or
           len(decoded_sentence) > 50):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence


In [20]:
def load_dataset_test(path):
    with open(path, 'r') as f:
        train_dataset = json.load(f)
        
    image_id_list = []
    bbox_categories_list = []
    output_sentences_per_image_list = []
    for pair in train_dataset:
        bbox_categories = pair["bbox_categories"]
        bbox_categories = ' '.join(map(str, bbox_categories))
        bbox_categories_list.append(bbox_categories)
        image_id = pair['image_id']
        image_id_list.append(image_id)
        output_sentences = pair["captions"]
        output_sentences_per_image_list.append(output_sentences)
    return image_id_list, bbox_categories_list, output_sentences_per_image_list

def generate_batch_test(X, input_token_index,max_length_src, batch_size = 64 ):
    ''' Generate a batch of data '''
    while True:
        for j in range(0, len(X), batch_size):
            encoder_input_data = np.zeros((batch_size, max_length_src),dtype='float32')
            for i, input_text in enumerate(X[j:j+batch_size]):
                print(input_text)
                for t, word in enumerate(input_text.split()):
                    encoder_input_data[i, t] = input_token_index[word] # encoder input seq
            yield([encoder_input_data])

def prepare_for_evaluation(lines, test_val_gen, encoder_model, decoder_model, target_token_index, reverse_target_char_index):
    img_id, X_test, actual_sentences = lines.image_id, lines.eng, lines.actual_sentences
    # Get all image-ids from test dataset
    expected = dict()
    results = dict()
    print("Preparing for evaluation")
    # calculation of metrics for test images dataset
    k = -1
    index_rows_process = 0
    for pair in range(0, len(lines)):
        k+=1
        image_id = img_id[k:k+1].values[0]
        expected[image_id] = []
        # Put ground truth captions to the structure accepted by evaluation framework.
        for desc in actual_sentences[k:k+1].values[0]:
            expected[image_id].append({"image_id": image_id, "caption": desc})
        # Predict captions

        st = time.time()
        
        (input_seq) = next(test_val_gen)
        decoded_sentence = decode_sequence(input_seq, encoder_model, decoder_model, target_token_index, reverse_target_char_index)
        generated = decoded_sentence.replace(" _END", "")
        input_sequence = X_test[k:k+1].values[0]
        et = time.time()
        # get the execution time
        elapsed_time = et - st

        # get the execution time
        # Put predicted captions to the structure accepted by evaluation framework.
        results[image_id] = [{"image_id": image_id, "caption": generated, "time": elapsed_time}]
        if index_rows_process % 100 == 0:
            print("Processed:")
            print(index_rows_process)
            print('Execution time:', elapsed_time * 1000, 'miliseconds')
            print("input_sequence", input_sequence)
            print("generated", generated)
            print("saved")
        index_rows_process += 1
    return expected, results

def clear_test(lines):
    # Lowercase all characters
    lines.eng=lines.eng.apply(lambda x: x.lower())
    # Remove quotes
    lines.eng=lines.eng.apply(lambda x: re.sub("'", '', x))
    exclude = set(string.punctuation) # Set of all special characters

    # Remove all the special characters
    lines.eng=lines.eng.apply(lambda x: ''.join(ch for ch in x if ch not in exclude))

    # Remove all numbers from text
    remove_digits = str.maketrans('', '', digits)
    lines.eng=lines.eng.apply(lambda x: x.translate(remove_digits))

    # Remove extra spaces
    lines.eng=lines.eng.apply(lambda x: x.strip())
    lines.eng=lines.eng.apply(lambda x: re.sub(" +", " ", x))
    lines.sample(10)
    return lines

In [21]:
image_id_list, bbox_categories_list, output_sentences_per_image_list = load_dataset_test(test_path) 
lines=pd.DataFrame({'image_id': pd.Series(image_id_list),
                    'eng': pd.Series(bbox_categories_list),
                    'actual_sentences': pd.Series(output_sentences_per_image_list)})
lines=lines[0:100]
lines = clear_test(lines)
img_id, X_test, actual_sentences = lines.image_id, lines.eng, lines.actual_sentences
test_val_gen = generate_batch_test(X_test, input_token_index,max_length_src,1)
    
expected, results = prepare_for_evaluation(lines, test_val_gen, encoder_model, decoder_model, target_token_index, reverse_target_char_index)

Preparing for evaluation
cat cat
Processed:
0
Execution time: 3733.4823608398438 miliseconds
input_sequence cat cat
generated  a cat is looking at the camera with a cat
saved
clock
boat boat boat boat boat boat boat boat boat boat person
train
sheep sheep sheep sheep sheep sheep sheep sheep sheep sheep sheep sheep truck sheep sheep
tv couch bowl vase bowl vase vase potted plant
dog car motorcycle motorcycle
horse horse person person horse
surfboard person
person frisbee
person person surfboard surfboard
person
pizza cat person chair dining table
car person person car car bus
cat couch dining table dog bowl bowl chair mouse
bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird person bird bird bird
person person person person surfboard person person
car car car car car car fire hydrant fire hydrant car car car car stop sign car
person kite kite
person person snowboard bottle
person kite
sandwich carrot carrot carrot carrot car

In [24]:
def calculate_results(expected, results, config):
    sys.path.append(config["coco-caption_path"])
    from pycocoevalcap.eval_any import COCOEvalCap
    # Load expected captions(ground truth from dataset) and results(predicted captions for specific image)
    # to the evaluation framework
    cocoEvalObj = COCOEvalCap(expected, results)
    # Evaluate
    cocoEvalObj.evaluate()
    calculated_metrics = {}
    # Store metrics  values in dictionary by metrics names
    for metric, score in cocoEvalObj.eval.items():
        calculated_metrics[metric] = score
    print(calculated_metrics)
    print("Calculating final results")
    imgToEval = cocoEvalObj.imgToEval
    for p in results:
        print(imgToEval)
        image_id, caption = p, results[p][0]['caption']
        imgToEval[image_id]['caption'] = caption
        imgToEval[image_id]['ground_truth_captions'] = [x['caption'] for x in expected[p]]

    evaluation_results_save_path = os.path.join(config["results_dir"], config["config_name"] + '.json')
    print("Results saved to ")
    print(evaluation_results_save_path)
    # Path to save evaluation results
    with open(evaluation_results_save_path, 'w') as outfile:
        json.dump(
            {'overall': calculated_metrics, 'dataset_name': config["results_dir"], 'imgToEval': imgToEval},
            outfile)
    return calculated_metrics

In [25]:
calculate_results(expected, results, config)

tokenization...


PTBTokenizer tokenized 6233 tokens at 41259.68 tokens per second.
PTBTokenizer tokenized 1083 tokens at 12620.42 tokens per second.


setting up scorers...
computing Bleu score...
{'testlen': 984, 'reflen': 976, 'guess': [984, 884, 784, 684], 'correct': [686, 318, 135, 66]}
ratio: 1.0081967213104424
Bleu_1: 0.697
Bleu_2: 0.501
Bleu_3: 0.351
Bleu_4: 0.254
computing METEOR score...
METEOR: 0.232
computing Rouge score...
ROUGE_L: 0.502
computing CIDEr score...
CIDEr: 1.000
computing SPICE score...


Parsing reference captions
Initiating Stanford parsing pipeline
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator tokenize
[main] INFO edu.stanford.nlp.pipeline.TokenizerAnnotator - TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ssplit
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator parse
[main] INFO edu.stanford.nlp.parser.common.ParserGrammar - Loading parser from serialized file edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ... 
done [0.6 sec].
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator lemma
[main] INFO edu.stanford.nlp.pipeline.StanfordCoreNLP - Adding annotator ner
Loading classifier from edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ... done [1.1 sec].
Loading classifier from edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ... done [0.6 sec].
Loading classif

SPICE evaluation took: 15.66 s
SPICE: 0.160
computing WMD score...
WMD: 0.498


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



{'Bleu_1': 0.697154471544007,
 'Bleu_2': 0.5007857196929164,
 'Bleu_3': 0.3508384573128562,
 'Bleu_4': 0.25406939473136414,
 'METEOR': 0.2317892761903464,
 'ROUGE_L': 0.5018448559292853,
 'CIDEr': 1.0002175832894291,
 'SPICE': 0.16029502317110267,
 'WMD': 0.49810717709173324}