In [None]:
import numpy as np
import tensorflow as tf
import glob
import random
from sklearn import model_selection

from keras.preprocessing import image
from keras.models import Model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Activation, TextVectorization, Embedding, Input, LSTM, Bidirectional, Lambda, Add, Multiply, Concatenate
from keras import backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard
from matplotlib import pyplot as plt
from itertools import chain, repeat, cycle
import keras
from sklearn.metrics import f1_score
import cv2
from tensorflow.keras.optimizers import SGD

import os
import shutil
import json
from collections import defaultdict
import pickle
import string

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
embMat = np.load("/content/drive/MyDrive/COS529: Project4/emb.npy")

In [None]:
numToks = embMat.shape[0]
numDims = embMat.shape[1]
numTopAnswers = 1000
maxLen = 25

In [268]:
def generateModel():
  # embedding layer for the question
  qInput = Input(shape=(maxLen, ), dtype="int64")
  model = Embedding(numToks, numDims, input_length = maxLen, embeddings_initializer=keras.initializers.Constant(embMat), trainable=False) (qInput)
  model, _, _ = LSTM(512, return_sequences=True, return_state=True)(model)
  model = Dropout(0.5) (model)
  model = LSTM(512)(model)

  imInput = Input(shape=(2048, ))
  imModel = Dense(512, activation="relu")(imInput)

  # using outer product
  # combine = Lambda(outer_product, output_shape=(1024**2, ))([model, imModel])
  combine = Add()([model, imModel])

  # FC and final output layer
  dense1 = Dense(1000, activation='relu')(combine)
  # dense1 = Dense(1000, activation='relu')(model)
  dense2 = Dropout(0.5) (dense1)
  dense3 = Dense(1000, activation='relu')(dense2)
  dense4 = Dropout(0.5) (dense3)
  dense5 = Dense(numTopAnswers, activation='softmax')(dense4)

  model = Model([qInput, imInput], dense5)

  return model

In [269]:
print(numToks, numDims, maxLen, numTopAnswers)
model = generateModel()

12916 300 25 1000


In [270]:
model.summary()

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_25 (InputLayer)          [(None, 25)]         0           []                               
                                                                                                  
 embedding_13 (Embedding)       (None, 25, 300)      3874800     ['input_25[0][0]']               
                                                                                                  
 lstm_28 (LSTM)                 [(None, 25, 512),    1665024     ['embedding_13[0][0]']           
                                 (None, 512),                                                     
                                 (None, 512)]                                                     
                                                                                           

In [196]:
trainAnnotations = np.load("/content/drive/MyDrive/COS529: Project4/trainAnns.npy", mmap_mode="r")
trainQuest = np.load("/content/drive/MyDrive/COS529: Project4/trainQs.npy", mmap_mode="r")
trainIm = np.load("/content/drive/MyDrive/COS529: Project4/trainIm.npy", mmap_mode="r")

valQuest = np.load("/content/drive/MyDrive/COS529: Project4/valQuestList.npy", mmap_mode="r")
valIm = np.load("/content/drive/MyDrive/COS529: Project4/valIm.npy", mmap_mode="r")
valAnnotations = np.load("/content/drive/MyDrive/COS529: Project4/valAnns.npy")

import pickle5 as pickle
with open("/content/drive/MyDrive/COS529: Project4/questMap.pickle", 'rb') as pickle_file:
  questMapVal = pickle.load(pickle_file)
with open("/content/drive/MyDrive/COS529: Project4/questMapTr.pickle", 'rb') as pickle_file:
  questMapTr = pickle.load(pickle_file)
with open("/content/drive/MyDrive/COS529: Project4/idxMap.pickle", 'rb') as pickle_file:
  idxMap = pickle.load(pickle_file)

In [None]:
print(len(trainIm), len(trainQuest), len(trainAnnotations))
n = 10003
print(trainIm[n])
print(trainQuest[n])
print(trainAnnotations[n])
print(questMapTr[trainQuest[n].tobytes()])
print(idxMap[trainAnnotations[n].argmax()])

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, questions, imfeat, annots, batch_size, dim, n_classes=numTopAnswers, shuffle=True):
        'Initialization'
        self.questDim = dim[0]
        self.imDim = dim[1]
        self.batch_size = batch_size
        self.questions = questions
        self.imfeat = imfeat
        self.annots = annots
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = [np.empty((self.batch_size, self.questDim)), np.empty((self.batch_size, self.imDim))]
        y = np.empty((self.batch_size, numTopAnswers), dtype=int)

        listSet = set(list_IDs_temp)
        
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[0][i] = self.questions[ID]
            X[1][i] = self.imfeat[ID]
            y[i] = self.annots[ID]
            
        return X, y

In [265]:
top_epochs = 7
# Parameters
print(maxLen)
params = {'dim': (maxLen, 2048),
          'batch_size': 512,
          'n_classes': 2000,
          'shuffle': True}

modelCheckpoint = ModelCheckpoint('model.h5', save_freq=2)

training_generator = DataGenerator(range(len(trainQuest)), trainQuest, trainIm, trainAnnotations, **params)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(training_generator, use_multiprocessing=True, workers=8, epochs=top_epochs)

25
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.History at 0x7fa777a1a550>

In [253]:
model.fit(training_generator, use_multiprocessing=True, workers=8, epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fa777a5d8d0>

In [236]:
def evaluateModel():
  results = {}
  print("predicting...")

  # questions = trainQuest
  # images = trainIm
  # annotations = trainAnnotations
  # questMap = questMapTr

  questions = valQuest
  images = valIm
  annotations = valAnnotations
  questMap = questMapVal

  print("creating the json...")
  hits = 0
  total = 0
  q_a_gt = []
  correctAnswers = set()

  for i in range(len(questions)//10000 + 1):
    start = i*10000
    end = min((i+1)*10000, len(questions) - 1)
    print(end)
    pred = model.predict([questions[start:end], images[start:end]])

    for j in range(start, end):
      indivPred = pred[j-start]
      ans = indivPred.argmax()
      real = annotations[j][0]
      if real != -1:
        answer = idxMap[ans]
        realAnswer = idxMap[real]
        question = questMap[questions[j].tobytes()]['question']
        im_id = questMap[questions[j].tobytes()]['image_id']
        if answer == realAnswer:
          correctAnswers.add(ans)
          hits += 1
      total += 1
      q_a_gt.append((question, answer, realAnswer, im_id))

    #results[question] = answer

  return hits, total, q_a_gt, correctAnswers
  #return results, hits, total, q_a_gt, correctAnswers

In [266]:
hits, total, q_a_gt, correctAnswers = evaluateModel()

predicting...
creating the json...
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
187363


In [None]:
def writeJson(results):
  with open("results.json", "w") as outfile:
    json.dump(results, outfile)

In [None]:
writeJson(results)

In [None]:
!pip3 install pickle5

Collecting pickle5
  Downloading pickle5-0.0.12-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (256 kB)
[?25l[K     |█▎                              | 10 kB 36.2 MB/s eta 0:00:01[K     |██▋                             | 20 kB 44.1 MB/s eta 0:00:01[K     |███▉                            | 30 kB 37.1 MB/s eta 0:00:01[K     |█████▏                          | 40 kB 24.9 MB/s eta 0:00:01[K     |██████▍                         | 51 kB 17.2 MB/s eta 0:00:01[K     |███████▊                        | 61 kB 14.5 MB/s eta 0:00:01[K     |█████████                       | 71 kB 13.9 MB/s eta 0:00:01[K     |██████████▎                     | 81 kB 15.4 MB/s eta 0:00:01[K     |███████████▌                    | 92 kB 14.4 MB/s eta 0:00:01[K     |████████████▉                   | 102 kB 13.1 MB/s eta 0:00:01[K     |██████████████                  | 112 kB 13.1 MB/s eta 0:00:01[K     |███████████████▍                | 122 kB 13.1 MB/s eta 0:00:01[K     |████████████████▋ 

In [None]:
valAnnotations

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])