#### Imports

In [24]:
import ast
import time
import tqdm
import pickle
import numpy as np
import pandas as pd

import tensorflow as tf
from torchtext.data.metrics import bleu_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Load Data and Word Maps

In [3]:
# Load pre-processed data and word maps
!cp "/content/drive/MyDrive/MS DL NLP Final Project/Dataset/word_idx.pkl" /content/
!cp "/content/drive/MyDrive/MS DL NLP Final Project/Dataset/idx_word.pkl" /content/
!cp "/content/drive/MyDrive/MS DL NLP Final Project/Dataset/data_preprocessed.csv" /content/
# !cp "/content/drive/MyDrive/Backup Project/word_idx.pkl" /content/
# !cp "/content/drive/MyDrive/Backup Project/idx_word.pkl" /content/
# !cp "/content/drive/MyDrive/Backup Project/data_preprocessed.csv" /content/


with open('/content/word_idx.pkl', 'rb') as file:
    word_idx = pickle.load(file)


with open('/content/idx_word.pkl', 'rb') as file:
    idx_word = pickle.load(file)

df = pd.read_csv("/content/data_preprocessed.csv")
df.head()

Unnamed: 0,image_name,comment,comment_lower_no_punc,comment_padded_index
0,1000092795.jpg,Two young guys with shaggy hair look at their...,two young guys with shaggy hair look at their...,"[1, 20, 28, 496, 8, 2086, 92, 161, 24, 63, 128..."
1,10002456.jpg,Several men in hard hats are operating a gian...,several men in hard hats are operating a gian...,"[1, 110, 38, 5, 341, 251, 17, 1581, 4, 806, 34..."
2,1000268201.jpg,A child in a pink dress is climbing up a set ...,a child in a pink dress is climbing up a set ...,"[1, 4, 74, 5, 4, 76, 102, 11, 312, 54, 4, 348,..."
3,1000344755.jpg,Someone in a blue shirt and hat is standing o...,someone in a blue shirt and hat is standing o...,"[1, 282, 5, 4, 23, 18, 7, 55, 11, 29, 9, 3208,..."
4,1000366164.jpg,"Two men , one in a gray shirt , one in a blac...",two men one in a gray shirt one in a black ...,"[1, 20, 38, 30, 5, 4, 91, 18, 30, 5, 4, 19, 18..."


In [4]:
# Fix image_enc and comment_padded_index columns
# df.drop(columns = ["image_enc"])
df["comment_padded_index"] = df["comment_padded_index"].apply(ast.literal_eval)
df["comment_padded_index"] = df["comment_padded_index"].apply(lambda x: np.array(x))

In [7]:
!cp '/content/drive/MyDrive/MS DL NLP Final Project/Dataset/MaxPooledFeatures.zip' /content/
# !cp '/content/drive/MyDrive/Backup Project/MaxPooledFeatures.zip' /content/

!unzip MaxPooledFeatures.zip

Archive:  MaxPooledFeatures.zip
replace content/MaxPooledFeatures/4107795952.npz? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [6]:
IMAGE_ENC_PATH = '/content/content/MaxPooledFeatures/'

# Append Numpy Arrays of Image Encodings to the Dataframe
def read_npz(image_name):
  image_name = image_name.split(".")[0] + '.npz'
  file_path = IMAGE_ENC_PATH + image_name
  image_enc = np.load(file_path)
  image_enc = image_enc['arr_0']
  return image_enc

df["image_enc"] = df["image_name"].apply(read_npz)
df.head()

Unnamed: 0,image_name,comment,comment_lower_no_punc,comment_padded_index,image_enc
0,1000092795.jpg,Two young guys with shaggy hair look at their...,two young guys with shaggy hair look at their...,"[1, 20, 28, 496, 8, 2086, 92, 161, 24, 63, 128...","[3.8488855, 2.4636881, 4.392019, 4.0071864, 0...."
1,10002456.jpg,Several men in hard hats are operating a gian...,several men in hard hats are operating a gian...,"[1, 110, 38, 5, 341, 251, 17, 1581, 4, 806, 34...","[2.0517633, 1.6446168, 2.2789757, 5.1107526, 0..."
2,1000268201.jpg,A child in a pink dress is climbing up a set ...,a child in a pink dress is climbing up a set ...,"[1, 4, 74, 5, 4, 76, 102, 11, 312, 54, 4, 348,...","[3.2866983, 1.7671489, 1.6473161, 2.8538892, 3..."
3,1000344755.jpg,Someone in a blue shirt and hat is standing o...,someone in a blue shirt and hat is standing o...,"[1, 282, 5, 4, 23, 18, 7, 55, 11, 29, 9, 3208,...","[3.0041192, 6.1499543, 0.9428881, 2.926143, 5...."
4,1000366164.jpg,"Two men , one in a gray shirt , one in a blac...",two men one in a gray shirt one in a black ...,"[1, 20, 38, 30, 5, 4, 91, 18, 30, 5, 4, 19, 18...","[3.5979598, 4.130095, 1.7548522, 3.6023533, 3...."


#### Batch Generator

In [8]:
# Split data into train / val / test
train_size = 28603 # First 28603 for train
val_size = 1590 # Next 1590 for val
test_size = 1590 # Remaining 1590 for test

train_start_idx = 0
train_end_idx = train_size

val_start_idx = train_end_idx
val_end_idx = val_start_idx + 1590

test_start_idx = val_end_idx
test_end_idx = test_start_idx + 1590

In [9]:
df_train = df.iloc[train_start_idx:train_end_idx]
df_val = df.iloc[val_start_idx:val_end_idx]
df_test = df.iloc[test_start_idx:test_end_idx]
# df_test["generated_comment"] = "this is a test comment which will be replaced by model generated comment later"
df_test["generated_comment"] = df["comment_lower_no_punc"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test["generated_comment"] = df["comment_lower_no_punc"]


In [38]:
# # Function to generate batch
# def batch_generator(frame, batch_size, shuffle = True):
#   if shuffle:
#     frame = frame.sample(frac = 1).reset_index(drop = True)

#   num_records = len(frame)
#   num_batches = (num_records + batch_size - 1) // batch_size

#   for i in range(num_batches):
#     start_idx = i * batch_size
#     end_idx = (i + 1) * batch_size
#     yield frame.iloc[start_idx:end_idx]

# Commented out as I am processing each test record individually.

In [10]:
# Helper function for OHE y_test
def ohe_dc_op(dc_op, word_idx):
  y_true_ohe = np.zeros((dc_op.shape[0], dc_op.shape[1], len(word_idx.keys())))

  for i, oa in enumerate(dc_op):
    for j, ia in enumerate(oa):
      y_true_ohe[i][j][ia] = 1

  return y_true_ohe

#### Load Trained Model

In [11]:
!unzip /content/saved_model.zip

unzip:  cannot find or open /content/saved_model.zip, /content/saved_model.zip.zip or /content/saved_model.zip.ZIP.


In [112]:
# Model Parameters
# batch_size = 256
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits = False)
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001)
max_len = df["comment_padded_index"].apply(lambda x: len(x)).max() - 1 # -1 for i/p -> o/p of decoder

SAVED_MODEL_PATH = "/content/saved_model"
model = tf.keras.models.load_model(SAVED_MODEL_PATH)
model.compile(optimizer = optimizer, loss = loss_fn)
model.summary()

Model: "efficient_decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               multiple                  459008    
                                                                 
 decoder (Decoder)           multiple                  5983232   
                                                                 
 dense_5 (Dense)             multiple                  2571028   
                                                                 
Total params: 9013268 (34.38 MB)
Trainable params: 9013268 (34.38 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


#### Evaluate

In [128]:
# For each record
for iter, (index, data) in enumerate(tqdm.tqdm(df_test.iterrows())):
  # Generated Caption
  generated_caption = ""

  # Form inputs for decoder
  # -> Extract image_encodings, stack them and convert to tensors
  context = data["image_enc"]
  context = np.vstack(context).reshape((1, context.shape[0]))
  context = np.tile(context[:, np.newaxis, :], (1, max_len, 1))
  context = tf.constant(context, dtype = tf.float32)

  x = data["comment_padded_index"]
  x = np.vstack(x).reshape((1, x.shape[0]))
  dc_ip = x[:, :-1]
  dc_op = x[:, 1:]
  y_true_ohe = ohe_dc_op(dc_op, word_idx)

  # For each time step
  for i in range(dc_ip.shape[1]):
    # Extract all input words till ith time step
    ith_ip = dc_ip[:, : i+1]

    # Pad remaining words and convert to tensor
    pad_amount = [(0, 0), (0, dc_ip.shape[1] - ith_ip.shape[1])]
    ith_ip = np.pad(ith_ip, pad_amount, mode = 'constant', constant_values = 0)
    ith_ip = tf.constant(ith_ip, dtype = tf.float32)

    # Forward Pass
    y_pred = model((context, ith_ip)).numpy()[:, :i + 1].argmax(axis = -1)[0][-1]
    word = idx_word[y_pred]

    # If <END> or <PAD> encountered, break, and do not append to output caption
    if y_pred == 2 or y_pred == 3:
      break

    # Append to generated caption
    generated_caption += " " + word

  # Append generated caption to dataframe
  df_test.iloc[iter]["generated_comment"] = generated_caption


0it [00:00, ?it/s]


#### BLEU Score

In [158]:
# TODO:
# Need to calculate B1, B2, B3, B4 for each image individually
# Average over all images

In [27]:
# Ref arrays to store B# scores
b1_l = []; b2_l = []; b3_l = []; b4_l = [];

for item, data in tqdm.tqdm(df_test.iterrows()):
  # Define candidate and references for BLEU score
  candidate = [[i for i in data["generated_comment"].split()]]
  references = [[i for i in data["comment_lower_no_punc"].split()]]

  # Calculate B1-B4 Scores
  b1 = bleu_score(candidate, [references], weights=[1.0, 0, 0, 0])
  b2 = bleu_score(candidate, [references], weights=[0.5, 0.5, 0, 0])
  b3 = bleu_score(candidate, [references], weights=[0.33, 0.33, 0.33, 0])
  b4 = bleu_score(candidate, [references], weights=[0.25, 0.25, 0.25, 0.25])

  #Append to lists
  b1_l.append(b1); b2_l.append(b2); b3_l.append(b3); b4_l.append(b4)

# Average
b1_avg = np.mean(b1_l); b2_avg = np.mean(b2_l); b3_avg = np.mean(b3_l); b4_avg = np.mean(b4_l)

print("\n")
print(f"B1 Avg: {b1_avg}")
print(f"B2 Avg: {b2_avg}")
print(f"B3 Avg: {b3_avg}")
print(f"B4 Avg: {b4_avg}")


1590it [00:10, 150.80it/s]



B1 Avg: 1.0
B2 Avg: 1.0
B3 Avg: 1.0
B4 Avg: 1.0



