In [1]:
import json
import numpy as np
import operator
from unidecode import unidecode
import h5py
import os
import glob
from pathlib import Path


  from ._conv import register_converters as _register_converters


In [3]:
class SIS_DataReader:

    def __init__(self, path_to_file=r'val.story-in-sequence.json'):
        self.path_to_file = path_to_file

    def create_word_frequency_document(self, path_to_json_file=r'word_frequencies.json'):

        data = json.load(open(self.path_to_file))
        annotations = data['annotations']

        frequency = {}
        for annotation in annotations:
            sentence = annotation[0]['text'].split()
            for word in sentence:
                # proverka za brishenje na greski so zborovi vo unicode format(latinski zborovi)
                if any(x.isupper() for x in unidecode(word)) == False:
                    count = frequency.get(word, 0)
                    frequency[word] = count + 1

        sorted_frequency = sorted(frequency.items(), key=operator.itemgetter(1), reverse=True)

        with open(path_to_json_file, 'w') as fp:
            json.dump(sorted_frequency, fp)

    def get_n_most_frequent_words(self, word_frequency_file=r'word_frequencies.json', vocabulary_size=10000):

        data = json.load(open(word_frequency_file))
        return data[0:vocabulary_size]

    def generate_vocabulary(self, vocabulary_file=r'vist2017_vocabulary.json',
                            word_frequency_file=r'word_frequencies.json', vocabulary_size=10000):

        data = self.get_n_most_frequent_words(word_frequency_file, vocabulary_size)

        idx_to_words = []
        idx_to_words.append("<NULL>")
        idx_to_words.append("<START>")
        idx_to_words.append("<END>")
        idx_to_words.append("<UNK>")

        for element in data:
            idx_to_words.append(element[0])

        words_to_idx = {}
        for i in range(len(idx_to_words)):
            words_to_idx[idx_to_words[i]] = i

        vocabulary = {}
        vocabulary["idx_to_words"] = idx_to_words
        vocabulary["words_to_idx"] = words_to_idx

        with open(vocabulary_file, 'w') as fp:
            json.dump(vocabulary, fp)

    def get_max_sentence_length(self):

        data = json.load(open(self.path_to_file))
        annotations = data['annotations']
        save_sent = []
        max_sentence_length = 0

        for annotation in annotations:
            sentence = annotation[0]['text'].split()
            length = len(sentence)
            if (length > max_sentence_length):
                max_sentence_length = length
                save_sent = sentence

        print(save_sent)
        return max_sentence_length

    def get_min_sentence_length(self):

        data = json.load(open(self.path_to_file))
        annotations = data['annotations']
        save_sent = []
        min_sentence_length = 10

        for annotation in annotations:
            sentence = annotation[0]['text'].split()
            length = len(sentence)
            if (length < min_sentence_length):
                min_sentence_length = length
                save_sent = sentence

        print(save_sent)
        return min_sentence_length

    def sentences_to_index(self, vocabulary_file=r'vist2017_vocabulary.json',
                           image_embedding_file=r'myCNN_image_features_val.hdf5',
                           save_file_path=r'stories_to_val_index_myCNN.hdf5',
                           images_directory=r'C:\Users\Nishanth\ai-visual-storytelling-seq2seq\vist_dataset\images\val',
                           max_length=20):

        vocabulary = json.load(open(vocabulary_file))
        
        #print(vocabulary)
        
        self.words_to_idx = vocabulary["words_to_idx"]
        self.max_length = max_length
        
        #print(self.path_to_file)
        
        data = json.load(open(self.path_to_file))
        annotations = data["annotations"]
        
        
        descriptions = self.descriptions_to_index()
        img_hash = self.get_image_features_hash(image_embedding_file)
        print(img_hash)
        
        
        images_path_names = [y for y in Path(r'C:\Users\Nishanth\ai-visual-storytelling-seq2seq\vist_dataset\images\val',
).rglob('*.jpg')]
        images_path_names_png = [y for y in Path(r'C:\Users\Nishanth\ai-visual-storytelling-seq2seq\vist_dataset\images\val',
).rglob('*.png')]
        #print(images_path_names)
        images_path_names = np.append(images_path_names, images_path_names_png)
        print(images_path_names)

        story_ids = []
        story_sentences = []
        story_images = []
        story_images_ids = []
        story_descriptions = []
        story_images_paths = []

        for i in range(0, len(annotations),5):

            var = (int(annotations[i][0]['photo_flickr_id']) in descriptions.keys()) and (
                    int(annotations[i + 1][0]['photo_flickr_id']) in descriptions.keys()) and (
                          int(annotations[i + 2][0]['photo_flickr_id']) in descriptions.keys()) and (
                          int(annotations[i + 3][0]['photo_flickr_id']) in descriptions.keys()) and (
                          int(annotations[i + 4][0]['photo_flickr_id']) in descriptions.keys())

            if not var:
                continue

            story_id = annotations[i][0]["story_id"]
            #print(story_id)
            img_id1, order1 = int(annotations[i][0]["photo_flickr_id"]), annotations[i][0][
                "worker_arranged_photo_order"]
            img_id2, order2 = int(annotations[i + 1][0]["photo_flickr_id"]), annotations[i + 1][0][
                "worker_arranged_photo_order"]
            img_id3, order3 = int(annotations[i + 2][0]["photo_flickr_id"]), annotations[i + 2][0][
                "worker_arranged_photo_order"]
            img_id4, order4 = int(annotations[i + 3][0]["photo_flickr_id"]), annotations[i + 3][0][
                "worker_arranged_photo_order"]
            img_id5, order5 = int(annotations[i + 4][0]["photo_flickr_id"]), annotations[i + 4][0][
                "worker_arranged_photo_order"]
            
            #print(img_id1)

            if not (str(img_id1) in img_hash):
                continue
            else:
                image1 = img_hash[str(img_id1)]

            if not (str(img_id2) in img_hash):
                continue
            else:
                image2 = img_hash[str(img_id2)]

            if not (str(img_id3) in img_hash):
                continue
            else:
                image3 = img_hash[str(img_id3)]

            if not (str(img_id4) in img_hash):
                continue
            else:
                image4 = img_hash[str(img_id4)]

            if not (str(img_id5) in img_hash):
                continue
            else:
                image5 = img_hash[str(img_id5)]

            story1 = self.sentences_to_index_helper(annotations[i][0]["text"], self.words_to_idx, max_length)
            #print(story1)
            story2 = self.sentences_to_index_helper(annotations[i + 1][0]["text"], self.words_to_idx, max_length)
            story3 = self.sentences_to_index_helper(annotations[i + 2][0]["text"], self.words_to_idx, max_length)
            story4 = self.sentences_to_index_helper(annotations[i + 3][0]["text"], self.words_to_idx, max_length)
            story5 = self.sentences_to_index_helper(annotations[i + 4][0]["text"], self.words_to_idx, max_length)

            description1 = descriptions[int(annotations[i][0]['photo_flickr_id'])]
            #print(description1)
            description2 = descriptions[int(annotations[i + 1][0]['photo_flickr_id'])]
            description3 = descriptions[int(annotations[i + 2][0]['photo_flickr_id'])]
            description4 = descriptions[int(annotations[i + 3][0]['photo_flickr_id'])]
            description5 = descriptions[int(annotations[i + 4][0]['photo_flickr_id'])]

            order1 = annotations[i][0]["worker_arranged_photo_order"]
            #print(order1)
            order2 = annotations[i + 1][0]["worker_arranged_photo_order"]
            order3 = annotations[i + 2][0]["worker_arranged_photo_order"]
            order4 = annotations[i + 3][0]["worker_arranged_photo_order"]
            order5 = annotations[i + 4][0]["worker_arranged_photo_order"]

            story_list = [(story1, order1), (story2, order2), (story3, order3), (story4, order4), (story5, order5)]
            story_list = sorted(story_list, key=operator.itemgetter(1))
            #print(story_list)
            image_list = [(image1, order1), (image2, order2), (image3, order3), (image4, order4), (image5, order5)]
            image_list = sorted(image_list, key=operator.itemgetter(1))

            ordered_stories = [story_list[0][0], story_list[1][0], story_list[2][0], story_list[3][0], story_list[4][0]]
            ordered_images = [image_list[0][0], image_list[1][0], image_list[2][0], image_list[3][0], image_list[4][0]]
            ordered_image_ids = [img_id1, img_id2, img_id3, img_id4, img_id5]
            print(ordered_stories)
            print(ordered_images)
            print(ordered_image_ids)

            ordered_descriptions = [description1, description2, description3, description4, description5]

            #ordered_image_path_names = []
            #for file_idx in range(len(images_path_names)):
             #   if images_path_names[file_idx].find(str(img_id1)):
              #      ordered_image_path_names.append(images_path_names[file_idx])
               # elif images_path_names[file_idx].find(str(img_id2)):
               #     ordered_image_path_names.append(images_path_names[file_idx])
               # elif images_path_names[file_idx].find(str(img_id3)):
                #    ordered_image_path_names.append(images_path_names[file_idx])
              #  elif images_path_names[file_idx].find(str(img_id4)):
               #     ordered_image_path_names.append(images_path_names[file_idx])
               # elif images_path_names[file_idx].find(str(img_id5)):
                #    ordered_image_path_names.append(images_path_names[file_idx])
               # else:
                #    ordered_image_path_names.append("None")

            story_ids.append(int(story_id))
            story_sentences.append(ordered_stories)
            story_images.append(ordered_images)
            story_images_ids.append(ordered_image_ids)
            story_descriptions.append(ordered_descriptions)
            # story_images_paths.append(ordered_image_path_names)

        print(story_ids)
        print(story_sentences)
        print(story_images)
        print(story_images_ids)
        print(story_descriptions)
        #print(story_images_paths)
        
        data_file = h5py.File(save_file_path, 'w')
        data_file.create_dataset("story_ids", data=story_ids)
        data_file.create_dataset("story_sentences", data=story_sentences)
        data_file.create_dataset("image_embeddings", data=story_images)
        data_file.create_dataset("image_ids", data=story_images_ids)
        data_file.create_dataset("descriptions", data=story_descriptions)
        data_file.create_dataset("image_paths", data = story_images_paths)

    def descriptions_to_index(self):
        description_data = json.load(
            open(r'val.description-in-isolation.json'))['annotations']

        description_to_index = {}
        for i in range(len(description_data)):
            photo_id = int(description_data[i][0]['photo_flickr_id'])
            if photo_id not in description_to_index.keys():
                description_to_index[photo_id] = self.sentences_to_index_helper(description_data[i][0]['text'],
                                                                                self.words_to_idx, 20)

        return description_to_index

    def sentences_to_index_helper(self, sentence, word_to_idx, max_length):
        words = sentence.split()
        result_sentence = []

        for word in words:
            if len(result_sentence) == max_length:
                break
            else:
                if word in word_to_idx:
                    result_sentence.append(word_to_idx[word])
                else:
                    result_sentence.append(word_to_idx["<UNK>"])

        result_sentence.insert(0, word_to_idx["<START>"])
        result_sentence.append(word_to_idx["<END>"])

        while len(result_sentence) < max_length + 2:
            result_sentence.append(word_to_idx["<NULL>"])

        return result_sentence

    def indecies_to_sentence(self, sentence, idx_to_word):

        result_sentence = ""
        for word in sentence:
            if word == 0:
                result_sentence = result_sentence + " " + idx_to_word[word]

        print(result_sentence)
        return result_sentence

    def get_image_features_hash(self, file_name):
        L = []
        image_features_file = h5py.File(file_name, 'r')
        image_features_ids = image_features_file["image_ids"]
        image_embeddings = image_features_file["embeddings"]
        dictionary = {}
        
        for i in image_features_ids:
            L.append(i.decode('unicode_escape'))

        
        
        for id, em in zip(L, image_embeddings):
            dictionary[str(id)] = em

        return dictionary


In [None]:
object = SIS_DataReader()
object.create_word_frequency_document()
object.generate_vocabulary()
object.sentences_to_index()
#train

In [4]:
filename = r'C:\Users\Nishanth\Desktop\ai-visual-storytelling-seq2seq\stories_to_train_index_myCNN.hdf5'

with h5py.File(filename, 'r') as f:
    # List all groups
    print("Keys: %s" % f.keys())
    print(list(f.keys()))
    print(f['descriptions'])
    a_group_key = list(f.keys())[2] #image_id

    # Get the data
    data1 = list(f[a_group_key])


Keys: KeysView(<HDF5 file "stories_to_train_index_myCNN.hdf5" (mode r)>)
['descriptions', 'image_embeddings', 'image_ids', 'image_paths', 'story_ids', 'story_sentences']
<HDF5 dataset "descriptions": shape (5287, 5, 22), type "<i4">


In [6]:
data1[77]

array([2752715, 2827527, 2827386, 2827303, 2827247], dtype=int64)

In [7]:
import nltk

In [8]:
# original
#1 Four people posing for a picture, there are three females and one male
#2 Group of people dancing in a darkly lit room
#3 man with a headband and glasses standing with a bottle in his hand
#4 Man with headband and glasses sitting atop the shoulders of another man who is sticking his tongue out
#5 A photo of three people with the female in the front sticking out her tongue

# our model
#1 we all had a party at the pub
#2 we invited all of my friends at the party
#3 here we are having a lot of fun playing games 
#4 and then there was some fun playing with pool  
#5 but at the end of the night , we all got together and went home all over it  

# alexnet
#1 we all had a little good family last night
#2 we had to make it all in the night and i hope we do 
#3 we then went back home and had some good conversation
#4 after that dinner , we played some video games 
#5 some of my friends got a little crazy

hypothesis = ['four', 'people', 'posing', 'for', 'a', 'picture','there','are','three','females','and','one','male','group','of','people','dancing','in','a','darkly','lit','room','man','with', 'a','headband','and','glasses','standing','with','a','bottle','in','his','hand','Man','with','headband','and','glasses','sitting','atop','the','shoulders','of','another','man','who','is','sticking','his','tongue','out','A','photo','of','three','people','with','the','female','in','the','front','sticking','out','her','tongue']
reference = ['we','all','had','a','party','at','the','pub','we','invited','all','of','my','friends','at','the','party','here','we','are','having','a','lot','of','fun','playing','games','and','then','there','was','some','fun','playing','with','pool','but','at','the','end','of','the','night','we','all','got','together','and','went','home','all','over','it']
reference_1 = ['we','all','had','a','little','good','family','last','night','we','had','to','make','it','all','in','the','night','and','i','hope','we','do','after','that','dinner','we','played','some','video','games','some','of','my','friends','got','a','little','crazy']
#there may be several references
BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.6612392275852992
0.19049864185045087


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [9]:
#original
#1 A bunch of people at a Christmas party only one woman is sitting down
#2 The counter top has several plates of food on it
#3 A set of plates that are on the kitchen counter
#4 Different people gather at the table during the holidays
#5 Three young adults are socializing in the living room of a home around Christmas time

#our model
#1 the whole family got together for a christmas party
#2 my sister was invited to a $<$UNK$>$ for the barbecue so everyone had fun 
#3 there was quite a lot of food and drinks
#4 and here we are at the party started getting ready and ready for dinner 
#5 then we had a group photo , everyone in the kitchen we set up 

#alexnet
#1 i love to travel
#2 i a good kitchen with fresh tomatoes and different types of food
#3 i have a lot of food i put my best of me i see i look at this well !
#4 here we are $<$UNK$>$ at the kitchen table eating , so !
#5 then we were dancing to the music

hypothesis = ['A','bunch','of','people','at','a','Christmas','party','only','one','woman','is','sitting','down','The','counter','top','has','several','plates','of','food','on','it','A','set','of','plates','that','are','on','the','kitchen','counter','Different','people','gather','at','the','table','during','the','holidays','Three','young','adults','are','socializing','in','the','living','room','of','a','home','around','Christmas','time']
reference = ['the','whole','family','got','together','for','a','christmas','party','my','sister','was','invited','to','a','UNK','for','the','barbecue','so','everyone','had','fun','there','was','quite','a','lot','of','food','and','drinks','and','here','we','are','at','the','party','started','getting','ready','and','ready','for','dinner','then','we','had','a','group','photo','everyone','in','the','kitchen','we','set','up']
reference_1 = ['i','love','to','travel','i','a','good','kitchen','with','fresh','tomatoes','and','different','types','of','food','i','have','a','lot','of','food','i','put','my','best','of','me','i','see','i','look','at','this','well','here','we','are','UNK','at','the','kitchen','table','eating','so','then','we','were','dancing','to','the','music']

BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.35459569487016757
0.3295645115332452


Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [10]:
#original
#1 A group of youngsters are ice skating on the pond
#2 A group of young women on ice skates during winter
#3 a woman is balancing on ice skates on a rink
#4 this ice skater performs a flawless manuever here
#5 The children are wearing hockey skates and skating at an ice rink

#our model
#1 at the ice skating festival , people stood around and spectators
#2 they linked t-shirts and skated around and enjoyed the show
#3 this girl also had a great time with the ball but he could n't really get a little worried
#4 as , $<$UNK$>$ , another with the bridesmaids you tell him know he was n't to help
#5 children and friends were waiting outside the church

#alex
#1 at the local skating park , there are many attractions
#2 they linked each other together
#3 the girl in yellow knew how to do tricks
#4 as white , white hair of white shirt is even a major
#5 here is the main owners arrived to the location 

hypothesis = ['A','group','of','youngsters','are','ice','skating','on','the','pond','A','group','of','young','women','on','ice','skates','during','winter','a','woman','is','balancing','on','ice','skates','on','a','rink','this','ice','skater','performs','a','flawless','manuever','here','The','children','are','wearing','hockey','skates','and','skating','at','an','ice','rink']
reference = ['at','the','ice','skating','festival','people','stood','around','and','spectators','they','linked','t-shirts','and','skated','around','and','enjoyed','the','show','this','girl','also','had','a','great','time','with','the','ball','but','he','could','not','really','get','a','little','worried','as','UNK','another','with','the','bridesmaids','you','tell','him','know','he','was','not','to','help','children','and','friends','were','waiting','outside','the','church']
reference_1 = ['at','the','local','skating','park','there','are','many','attractions','they','linked','each','other','together','the','girl','in','yellow','knew','how','to','do','tricks','as','white','white','hair','of','white','shirt','is','even','a','major','here','is','the','main','owners','arrived','to','the','location']

BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.19365933250318007
0.6324555320336759


Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [11]:
#original
#1 Three children (a girl and two boys) walking together outdoors, holding hands
#2 three boys are sitting together on a green chair
#3 Two children are reaching into a basket for necklaces
#4 The children are wearing necklaces on their neck
#5 Two women with children are sitting by a mailbox

#our model
#1 the man was happy
#2 they take a moment to take a picture with the kids
#3 at the reception , the children decided to spend a day out in the country
#4 everyone had a great time at the reception , everyone was waiting for them
#5 and [female] had a blast and was ready to eat a delicious meal and before the year was going

#alexnet
#1 school company day . a new $<$UNK$>$ of $<$UNK$>$ $<$UNK$>$} 
#2 she was so excited to be there
#3 after the ceremony everyone had her speech himself using the entire bottle
#4 she took a picture with them from the goats
#5 she she walked her down the photographer he would she would

hypothesis = ['Three','children','a','girl','and','two','boys','walking','together','outdoors','holding','hands','three','boys','are','sitting','together','on','a','green','chair','Two','children','are','reaching','into','a','basket','for','necklaces','The','children','are','wearing','necklaces','on','their','neck','Two','women','with','children','are','sitting','by','a','mailbox']
reference = ['the','man','was','happy','they','take','a','moment','to','take','a','picture','with','the','kids','at','the','reception','the','children','decided','to','spend','a','day','out','in','the','country','everyone','had','a','great','time','at','the','reception','everyone','was','waiting','for','them','and','female','had','a','blast','and','was','ready','to','eat','a','delicious','meal','and','before','the','year','was','going']
reference_1 = ['school','company','day','a','new','UNK','of','UNK','UNK','she','was','so','excited','to','be','there','after','the','ceremony','everyone','had','her','speech','himself','using','the','entire','bottle','she','took','a','picture','with','them','from','the','goats','she','she','walked','her','down','the','photographer','he','would','she','would']

BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.4768521208065966
0.492057143452666


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [12]:
#original
#1 A boy that is wearing no clothes that is standing next to the tree and his brother
#2 Two children are having fun building a gingerbread house
#3 A gingerbread house that is on the table
#4 A boy at christmas time posing with a man dressed as santa
#5 Two young boys standing together and holding a Wii video game system, still in the box

#our model
#1 the man in green was pretty well in the morning
#2 after that he met up in the snow covered that was amazing
#3 the tree that contained had been created for the occasion
#4 of course $[$male$]$ was not happy that he had to leave and tell what a great time when it was going
#5 they could n't think the love and do n't wait to do this time , but , all of their memories

#alex
#1 it was christmas night and i had a lot of fun
#2 after that we found a small store that had lots of alcohol
#3 it was bad enough comfortable
#4 of course barb knew he would be seen a few words from her best stories to see
#5 after all the presents had been opened , the boys were talking about making a smile


hypothesis = ['A','boy','that','is','wearing','no','clothes','that','is','standing','next','to','the','tree','and','his','brother','Two','children','are','having','fun','building','a','gingerbread','house','A','gingerbread','house','that','is','on','the','table','A','boy','at','christmas','time','posing','with','a','man','dressed','as','santa','Two','young','boys','standing','together','and','holding','a','Wii','video','game','system','still','in','the','box']
reference = ['the','man','in','green','was','pretty','well','in','the','morning','after','that','he','met','up','in','the','snow','covered','that','was','amazing','the','tree','that','contained','had','been','created','for','the','occasion','of','course','male','was','not','happy','that','he','had','to','leave','and','tell','what','a','great','time','when','it','was','going','they','could','not','think','the','love','and','do','not','wait','to','do','this','time','but','all','of','their','memories']
reference_1 = ['it','was','christmas','night','and','i','had','a','lot','of','fun','after','that','we','found','a','small','store','that','had','lots','of','alcohol','it','was','bad','enough','comfortable','of','course','barb','knew','he','would','be','seen','a','few','words','from','her','best','stories','to','see','after','all','the','presents','had','been','opened','the','boys','were','talking','about','making','a','smile']

BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.2496385375070295
0.6632807437760121


Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [13]:
#original
#1 A bird latches onto a tree trunk to find itself food
#2 Tall pine trees stand tall above the white snow
#3 A mountain lake scene with clouds in the sky
#4 Multiple berries on a branch surrounded by some leaves
#5 A bunch of buildings sit in the middle of a field

#our model
#1 claus the woodpecker was searching for berries
#2 he searches through the trees that sat in the tree
#3 when the sun went over the coliseum , we decided to go inside the hill and really beautiful going at
#4 here i am enjoying a couple who i wish that they were able to take a picture and wish to have with
#5 here i took a picture of a wall with a castle from the east of the town just $<$UNK$>

#alex
#1 claus the woodpecker was searching for secret
#2 he searches through the trees , grass and trees
#3 he found trees the tall trees trees as the sky makes its way to the sky
#4 these grapes even pretty enough to eat the delicious delicious
#5 the woodpecker was sad when i went to the organization

hypothesis = ['A','bird','latches','onto','a','tree','trunk','to','find','itself','food','Tall','pine','trees','stand','tall','above','the','white','snow','A','mountain','lake','scene','with','clouds','in','the','sky','Multiple','berries','on','a','branch','surrounded','by','some','leaves','A','bunch','of','buildings','sit','in','the','middle','of','a','field']
reference = ['claus','the','woodpecker','was','searching','for','berries','he','searches','through','the','trees','that','sat','in','the','tree','when','the','sun','went','over','the','coliseum','we','decided','to','go','inside','the','hill','and','really','beautiful','going','at','here','i','am','enjoying','a','couple','who','i','wish','that','they','were','able','to','take','a','picture','and','wish','to','have','with','here','i','took','a','picture','of','a','wall','with','a','castle','from','the','east','of','the','town','just','UNK']
reference_1 = ['claus','the','woodpecker','was','searching','for','secret','he','searches','through','the','trees','grass','and','trees','he','found','trees','the','tall','trees','trees','as','the','sky','makes','its','way','to','the','sky','these','grapes','even','pretty','enough','to','eat','the','delicious','delicious','the','woodpecker','was','sad','when','i','went','to','the','organization']

BLEUscore = nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)
BLEUscore_1 = nltk.translate.bleu_score.sentence_bleu([reference_1], hypothesis)

print(BLEUscore)
print(BLEUscore_1)


0.18653567503517982
0.2242275002250832


Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
