In [65]:
from nltk.translate.bleu_score import sentence_bleu
import pandas as pd 
from PIL import Image
from torchvision import transforms
import re
import numpy as np
import torch 
import pickle
from model import CNN2RNN

In [66]:
data=pd.read_csv('Flickr8k/caption_test.txt')

In [67]:
list_image=set(data['image'])
data.head(5)

Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,A child in a pink dress is climbing up a set o...
1,1000268201_693b08cb0e.jpg,A girl going into a wooden building .
2,1000268201_693b08cb0e.jpg,A little girl climbing into a wooden playhouse .
3,1000268201_693b08cb0e.jpg,A little girl climbing the stairs to her playh...
4,1000268201_693b08cb0e.jpg,A little girl in a pink dress going into a woo...


In [68]:
print("Number image:",len(list_image))

Number image: 298


In [69]:
def get_list_caption(image_name):
    indxs=np.where(data['image']==image_name)[0]
    result=[]
    for index in indxs:
        s=str(data.iloc[index]['caption'])
        s=re.sub(r'[^\w\s]','',s)
        result.append(s.split())
    return result

In [70]:
t=get_list_caption('1000268201_693b08cb0e.jpg')

In [71]:
t

[['A',
  'child',
  'in',
  'a',
  'pink',
  'dress',
  'is',
  'climbing',
  'up',
  'a',
  'set',
  'of',
  'stairs',
  'in',
  'an',
  'entry',
  'way'],
 ['A', 'girl', 'going', 'into', 'a', 'wooden', 'building'],
 ['A', 'little', 'girl', 'climbing', 'into', 'a', 'wooden', 'playhouse'],
 ['A',
  'little',
  'girl',
  'climbing',
  'the',
  'stairs',
  'to',
  'her',
  'playhouse'],
 ['A',
  'little',
  'girl',
  'in',
  'a',
  'pink',
  'dress',
  'going',
  'into',
  'a',
  'wooden',
  'cabin']]

In [52]:
transform = transforms.Compose([
    transforms.Resize((356,356)),
    transforms.RandomCrop((299,299)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])


with open('vocab.pickle','rb') as file:
    dataset=pickle.load(file)

In [40]:
state_dict=torch.load('model_40.pth',map_location=torch.device('cpu'))
embedding_size=state_dict['embedding_size']
hidden_size=state_dict['hidden_size']
vocab_size=state_dict['vocab_size']
num_layers=state_dict['num_layers']
model=CNN2RNN(embedding_size,hidden_size,vocab_size,num_layers)
model.load_state_dict(state_dict['state_dict'])
model.eval()

CNN2RNN(
  (encoder): Encoder(
    (model): Inception3(
      (Conv2d_1a_3x3): BasicConv2d(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv2d_2a_3x3): BasicConv2d(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (Conv2d_2b_3x3): BasicConv2d(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (Conv2d_3b_1x1): BasicConv2d(
        (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_

In [75]:
bleu_1,bleu_2,bleu_3,bleu_4=[],[],[],[]

for image_name in list_image:
    image=Image.open('Flickr8k/Images/'+image_name)
    image=transform(image)
    caption=str(model.caption_image(image.unsqueeze(0),dataset.vocab))
    caption=re.sub(r'[^\w\s]','',caption).split()
    list_caption=get_list_caption(str(image_name))
    bleu_1.append(sentence_bleu(list_caption,caption,weights=(1,0,0,0)))
    bleu_2.append(sentence_bleu(list_caption,caption,weights=(0,1,0,0)))
    bleu_3.append(sentence_bleu(list_caption,caption,weights=(0,0,1,0)))
    bleu_4.append(sentence_bleu(list_caption,caption,weights=(0,0,0,1)))  



In [76]:
n=len(list(set(data['image'])))
print("BLEU socre with 1-gram:",sum(bleu_1)/n)
print("BLEU socre with 2-gram:",sum(bleu_2)/n)
print("BLEU socre with 3-gram:",sum(bleu_3)/n)
print("BLEU socre with 4-gram:",sum(bleu_4)/n)

BLEU socre with 1-gram: 0.5371122203301513
BLEU socre with 2-gram: 0.2416179753135425
BLEU socre with 3-gram: 0.10162504598317396
BLEU socre with 4-gram: 0.044321401622671217
