https://medium.com/@14prakash/transfer-learning-using-keras-d804b2e04ef8
https://towardsdatascience.com/keras-transfer-learning-for-beginners-6c9b8b7143e
https://www.pyimagesearch.com/2019/05/20/transfer-learning-with-keras-and-deep-learning/

# Import Packages

In [1]:
import tensorflow as tf
import tensorflow.keras as keras

print(tf.__version__)
print(tf.keras.__version__)

1.13.1
2.2.4-tf


In [2]:
import json
import pandas as pd
import numpy as np
import glob
import os
import spacy
import h5py


# Mount Google Drive

In [34]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
%pwd

'/content'

In [0]:
%mkdir /content/gdrive/My Drive/42028-DL-CNN/
%mkdir /content/gdrive/My Drive/42028-DL-CNN/Assignment3/

In [3]:
%cd /home/anthony/Projects/42028-assignment3/
#cd /content/gdrive/My Drive/42028-DL-CNN/Assignment3/
%ls

/home/anthony/Projects/42028-assignment3
[0m[01;31mAnnotations_Train_mscoco.zip[0m  Pipfile.lock                [01;31mtrain2014.zip[0m
[01;31mAnnotations_Val_mscoco.zip[0m    [01;34mpredict[0m/                    train_set_index.csv
app.py                        [01;34m__pycache__[0m/                [01;34muploads[0m/
[01;34mdata[0m/                         [01;31mQuestions_Train_mscoco.zip[0m  [01;31mval2014.zip[0m
[01;34mmodels[0m/                       [01;31mQuestions_Val_mscoco.zip[0m    val_set_index.csv
[01;34mmodel_training[0m/               README.md                   [01;34mVQA[0m/
[01;34mnotebooks[0m/                    settings.py
Pipfile                       [01;34mtemplates[0m/


# Download COCO Images

In [0]:
%mkdir "/content/gdrive/My Drive/42028-DL-CNN/Assignment3/data"
%mkdir "/content/gdrive/My Drive/42028-DL-CNN/Assignment3/data/coco"
%mkdir "/content/gdrive/My Drive/42028-DL-CNN/Assignment3/data/coco/images"
%mkdir "/content/gdrive/My Drive/42028-DL-CNN/Assignment3/data/coco/annotations"

In [0]:
%ls

In [0]:
!wget http://images.cocodataset.org/zips/train2014.zip && unzip train2014.zip -d data/coco/images
!wget http://images.cocodataset.org/zips/val2014.zip && unzip val2014.zip -d data/coco/images

Output hidden; open in https://colab.research.google.com to view.

In [0]:
!wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip && unzip annotations_trainval2014.zip -d data/coco/annotations

# Prepare Training and Validation Dataframes

In [0]:
sampling = False

## Prepare Val

### Load Annotations file and extract filename, categories

In [0]:
#with open('data/coco/annotations/instances_val2014.json') as json_file:  
with open('data/coco/annotations/annotations/instances_val2014.json') as json_file:  
    data = json.load(json_file)
    
images_df = pd.DataFrame(data['images'])
annotations_df = pd.DataFrame(data['annotations'])
categories_df = pd.DataFrame(data['categories'])
images_df = images_df[['file_name','height','id','width']]
annotations_df = annotations_df[['category_id','image_id']]
categories_df = categories_df[['id', 'name']]

val_set = annotations_df.merge(images_df, how='left', left_on='image_id', right_on='id').drop('id', axis=1)
val_set = val_set.merge(categories_df, how='left', left_on='category_id', right_on='id').drop('id', axis=1)

### Remove missing images from directory

In [13]:
def list_images_from_dir(path):
  img_list = glob.glob(path + '/*.jpg')
  img_list = [img.replace(path, '') for img in img_list]
  print(len(img_list))
  return img_list

def remove_missing_images(df, col_name, img_list):
  return df[df[col_name].isin(img_list)]

In [87]:
val_dir = "data/coco/images/val2014/"

val_images = list_images_from_dir(val_dir)

40504


In [51]:
val_set = remove_missing_images(val_set, 'file_name', val_images)

val_set.shape, val_set['name'].nunique()

((291875, 6), 80)

### Sampling

In [0]:
cat_list = ['person', 'car', 'book', 'bottle', 'cup']
val_set = val_set[val_set['name'].isin(cat_list)]
val_set.shape

(127148, 6)

In [0]:
if sampling:
    all_val_set = val_set.copy()
    val_set = val_set.sample(50)
    val_set.to_csv("val_set_index.csv")
#else:
#    val_index = pd.read_csv("val_set_index.csv")
#    val_index = val_index.iloc[:, 0]
#    val_set = val_set.iloc[val_index.to_list(),]
val_set.shape, val_set['name'].nunique()

((50, 6), 5)

In [0]:
val_set['name'].nunique(), val_set['name'].unique()

(5, array(['person', 'cup', 'car', 'bottle', 'book'], dtype=object))

## Prepare train

### Load Annotations file and extract filename, categories

In [0]:
#with open('data/coco/annotations/instances_train2014.json') as json_file:  
with open('data/coco/annotations/annotations/instances_train2014.json') as json_file:  
    train_data = json.load(json_file)

images_df = pd.DataFrame(train_data['images'])
annotations_df = pd.DataFrame(train_data['annotations'])
categories_df = pd.DataFrame(train_data['categories'])
images_df = images_df[['file_name','height','id','width']]
annotations_df = annotations_df[['category_id','image_id']]
categories_df = categories_df[['id', 'name']]

train_set = annotations_df.merge(images_df, how='left', left_on='image_id', right_on='id').drop('id', axis=1)
train_set = train_set.merge(categories_df, how='left', left_on='category_id', right_on='id').drop('id', axis=1)

### Remove missing images from directory

In [86]:
train_dir = "data/coco/images/train2014/"

train_images = list_images_from_dir(train_dir)

0


In [0]:
train_set = remove_missing_images(train_set, 'file_name', train_images)

train_set.shape, train_set['name'].nunique()

((604907, 6), 80)

### Sampling

In [0]:
cat_list = ['person', 'car', 'book', 'bottle', 'cup']
train_set = train_set[train_set['name'].isin(cat_list)]
train_set.shape

(264912, 6)

In [0]:
if sampling:
    all_train_set = train_set.copy()
    train_set = train_set.sample(100)
    train_set.to_csv("train_set_index.csv")
#else:
#    train_index = pd.read_csv("train_set_index.csv")
#    train_index = train_index.iloc[:,0]
#    train_set = train_set.iloc[train_index.to_list(),]
train_set.shape, train_set['name'].nunique()

((100, 6), 5)

In [0]:
train_set['name'].nunique(), train_set['name'].unique()

(5, array(['person', 'book', 'bottle', 'car', 'cup'], dtype=object))

# Train Image Model

In [0]:
from keras.applications.resnet50 import ResNet50
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.vgg16 import VGG16
from keras.models import Model,Sequential
from keras.layers import Input, LSTM, Multiply, Dense, Embedding, Flatten, GlobalAveragePooling2D

# Load the ResNet50 model with pretrained weights

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
#base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))


# Reconnect the layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(4096, activation='relu', name='aux_output')(x)

if sampling:
  predictions = Dense(5, activation='softmax')(x)
else:
  predictions = Dense(80, activation='softmax')(x)

# This creates a model that includes
# the Input layer and three Dense layers
model = Model(inputs=base_model.input, outputs=predictions)

for layer in model.layers[:-3]:
    layer.trainable = False




In [0]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 156, 156, 3)  0           input_10[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 75, 75, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 75, 75, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [0]:
%mkdir models
%mkdir models/resnet50

In [0]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import backend as K

def get_callbacks(file_path, chkpnt=True, estop=True, red_lr=True, csv_log=True, cp_name="-{epoch:04d}-{val_loss:.2f}.ckpt"):
  
    callbacks = []

    if chkpnt:
      callbacks.append(ModelCheckpoint(file_path + cp_name, monitor="val_loss", mode="min",\
                        save_weights_only=True, save_best_only=True, verbose=1))
    if estop:
      callbacks.append(EarlyStopping(monitor='val_loss', mode='min', min_delta=0, patience=5,\
                        restore_best_weights=True, verbose=1))
    if red_lr:
      callbacks.append(ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=3,\
                        min_delta=0.00001, verbose=True))
    if csv_log:
      callbacks.append(CSVLogger(file_path + '-training.log'))

    return callbacks


In [0]:
callbacks = get_callbacks("models/resnet50")

In [0]:
from keras.preprocessing.image import ImageDataGenerator

# Updated to do image augmentation
train_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator=train_datagen.flow_from_dataframe(
    dataframe=train_set, 
    directory=train_dir, 
    x_col="file_name", 
    y_col="name", 
    class_mode="categorical", 
    target_size=(150, 150), 
    batch_size=10)


validation_generator=test_datagen.flow_from_dataframe(
    dataframe=val_set, 
    directory=val_dir, 
    x_col="file_name", 
    y_col="name", 
    class_mode="categorical", 
    target_size=(150, 150), 
    batch_size=10)

Found 100 images belonging to 5 classes.
Found 50 images belonging to 5 classes.


In [0]:
from keras.optimizers import RMSprop, Adam

lr_start = 1e-4
rmsprop = RMSprop(lr=lr_start)
adam = Adam(lr=lr_start)

model.compile(loss='binary_crossentropy',
              optimizer=adam,
              metrics=['acc'])

In [0]:
step_size_train=train_generator.n//train_generator.batch_size
step_size_val=validation_generator.n//validation_generator.batch_size

# Train the model
history = model.fit_generator(
      train_generator,
      steps_per_epoch=step_size_train,  # 2000 images = batch_size * steps
      epochs=10,
      callbacks=callbacks,
      validation_data=validation_generator,
      validation_steps=step_size_val,  # 1000 images = batch_size * steps
      verbose=1)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.43618, saving model to models/resnet50-0001-0.44.ckpt
Epoch 2/10

Epoch 00002: val_loss improved from 0.43618 to 0.37755, saving model to models/resnet50-0002-0.38.ckpt
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.37755
Epoch 4/10

Epoch 00004: val_loss improved from 0.37755 to 0.35615, saving model to models/resnet50-0004-0.36.ckpt
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.35615
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.35615
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.35615

Epoch 00007: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05.
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.35615
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.35615
Restoring model weights from the end of the best epoch
Epoch 00009: early stopping


In [0]:
model.save(f'models/resnet50/resnet50_final.h5')

## Extract Feature Maps Layer

In [0]:
model_feat_extract = Model(inputs=model.input, outputs=model.get_layer("aux_output").output)

model_name = f'models/resnet50/resnet50_feature_extractor.h5'
model_feat_extract.save(model_name)

In [0]:
model_feat_extract.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 156, 156, 3)  0           input_12[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 75, 75, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 75, 75, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [0]:
model_feat_extract = keras.models.load_model(model_name)

Instructions for updating:
`normal` is a deprecated alias for `truncated_normal`
Instructions for updating:
Colocations handled automatically by placer.


In [0]:
train_preds = model_feat_extract.predict_generator(train_generator, max_queue_size=10, workers=-1, use_multiprocessing=True, verbose=0)

In [0]:
val_preds = model_feat_extract.predict_generator(validation_generator, max_queue_size=10, workers=-1, use_multiprocessing=True, verbose=0)

In [0]:
with h5py.File('vqa_image_features.h5','w') as hf:
  hf.create_dataset('train', data=train_preds)
  hf.create_dataset('val', data=val_preds)

OSError: ignored

In [0]:
with h5py.File('vqa_image_features.h5','r') as hf:
  train_img_feats =  np.array(hf.get('train'))
  val_img_feats =  np.array(hf.get('val'))

In [0]:
val_img_feats

array(None, dtype=object)

# Download VQA Dataset

In [11]:
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip && ! unzip Questions_Val_mscoco.zip -d data/vqa_v1
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Train_mscoco.zip && ! unzip Questions_Train_mscoco.zip -d data/vqa_v1

--2019-06-07 07:32:43--  https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.232.61
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.232.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10594497 (10M) [application/zip]
Saving to: 'Questions_Val_mscoco.zip’


2019-06-07 07:32:57 (755 KB/s) - 'Questions_Val_mscoco.zip’ saved [10594497/10594497]

Archive:  Questions_Val_mscoco.zip
  inflating: data/vqa_v1/OpenEnded_mscoco_val2014_questions.json  
  inflating: data/vqa_v1/MultipleChoice_mscoco_val2014_questions.json  
--2019-06-07 07:32:58--  https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Train_mscoco.zip
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.109.117
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.109.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21985607 (21M) [application/zip]
Saving to: 'Questions_Train_mscoco.

In [12]:
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip && ! unzip Annotations_Train_mscoco.zip -d data/vqa_v1
!wget https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip && ! unzip Annotations_Val_mscoco.zip -d data/vqa_v1

--2019-06-07 07:33:05--  https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.229.101
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.229.101|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12167843 (12M) [application/zip]
Saving to: 'Annotations_Train_mscoco.zip’


2019-06-07 07:33:09 (3.44 MB/s) - 'Annotations_Train_mscoco.zip’ saved [12167843/12167843]

Archive:  Annotations_Train_mscoco.zip
  inflating: data/vqa_v1/mscoco_train2014_annotations.json  
--2019-06-07 07:33:10--  https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.97.117
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.97.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6031604 (5.8M) [application/zip]
Saving to: 'Annotations_Val_mscoco.zip’


2019-06-07 07:33:14 (2.23 MB/s) - 'Annotations_Val_msc

# Prepare VQA Dataframes

## Load JSON Files

In [14]:
data_folder = "data/vqa_v1/"

train_annot_file   = f"{data_folder}/mscoco_train2014_annotations.json"
val_annot_file     = f"{data_folder}/mscoco_val2014_annotations.json"

train_open_quest_file   = f"{data_folder}OpenEnded_mscoco_train2014_questions.json" 
train_mult_quest_file   = f"{data_folder}MultipleChoice_mscoco_train2014_questions.json" 
val_open_quest_file     = f"{data_folder}OpenEnded_mscoco_val2014_questions.json" 
val_mult_quest_file     = f"{data_folder}MultipleChoice_mscoco_val2014_questions.json"  

train_img_folder   = "train2014"
val_img_folder     = "val2014"


In [15]:
def load_json_file(file_path):
  with open(file_path, "r") as read_file:
    json_dict = json.load(read_file)
  return json_dict

def json_2_df(file_path):  
  if 'annot' in file_path:
    key = 'annotations'
  elif 'quest' in file_path:
    key = 'questions'
  else:
    print("[ERROR] JSON file should be annotations or questions")
    return None
  json_file = load_json_file(file_path)
  json_dict = json_file.get(key)
  json_df = pd.DataFrame(json_dict)
  return json_df

def get_ques_annot_json(train_annot_file, val_annot_file, train_open_quest_file, val_open_quest_file):
  train_annot_df      = json_2_df(train_annot_file)
  val_annot_df        = json_2_df(val_annot_file)
  train_open_quest_df = json_2_df(train_open_quest_file)
  val_open_quest_df  = json_2_df(val_open_quest_file)
  
  return train_annot_df, val_annot_df, train_open_quest_df, val_open_quest_df

In [6]:
train_annot_df, val_annot_df, train_open_quest_df, val_open_quest_df = get_ques_annot_json(train_annot_file, val_annot_file, train_open_quest_file, val_open_quest_file)

In [7]:
train_annot_df.head()

Unnamed: 0,answer_type,answers,image_id,multiple_choice_answer,question_id,question_type
0,other,"[{'answer': 'oval', 'answer_confidence': 'yes'...",487025,curved,4870250,what
1,yes/no,"[{'answer': 'yes', 'answer_confidence': 'yes',...",487025,yes,4870251,is there a
2,other,"[{'answer': '1', 'answer_confidence': 'yes', '...",487025,1,4870252,is this
3,yes/no,"[{'answer': 'no', 'answer_confidence': 'yes', ...",78077,no,780770,is this a
4,other,"[{'answer': 'white', 'answer_confidence': 'yes...",78077,white,780771,what color is the


In [8]:
val_annot_df.head()

Unnamed: 0,answer_type,answers,image_id,multiple_choice_answer,question_id,question_type
0,other,"[{'answer': 'wood', 'answer_confidence': 'yes'...",350623,wood,3506232,what is the
1,yes/no,"[{'answer': 'no', 'answer_confidence': 'yes', ...",350623,no,3506230,is the
2,other,"[{'answer': 'kettles', 'answer_confidence': 'y...",350623,kettles,3506231,what
3,yes/no,"[{'answer': 'yes', 'answer_confidence': 'maybe...",8647,no,86472,is this an
4,yes/no,"[{'answer': 'yes', 'answer_confidence': 'yes',...",8647,yes,86470,are there


## Rename Image file

In [16]:
def get_image_filename(img_id, dataSubType):
  return f"COCO_{dataSubType}_{str(img_id).zfill(12)}.jpg"

In [10]:
train_annot_df['image_name'] = train_annot_df['image_id'].apply(lambda img_id: get_image_filename(img_id, dataSubType=train_img_folder))
val_annot_df['image_name'] =   val_annot_df['image_id'].apply(lambda img_id: get_image_filename(img_id, dataSubType=val_img_folder))

In [11]:
train_annot_df.head()

Unnamed: 0,answer_type,answers,image_id,multiple_choice_answer,question_id,question_type,image_name
0,other,"[{'answer': 'oval', 'answer_confidence': 'yes'...",487025,curved,4870250,what,COCO_train2014_000000487025.jpg
1,yes/no,"[{'answer': 'yes', 'answer_confidence': 'yes',...",487025,yes,4870251,is there a,COCO_train2014_000000487025.jpg
2,other,"[{'answer': '1', 'answer_confidence': 'yes', '...",487025,1,4870252,is this,COCO_train2014_000000487025.jpg
3,yes/no,"[{'answer': 'no', 'answer_confidence': 'yes', ...",78077,no,780770,is this a,COCO_train2014_000000078077.jpg
4,other,"[{'answer': 'white', 'answer_confidence': 'yes...",78077,white,780771,what color is the,COCO_train2014_000000078077.jpg


## Remove Missing Images

In [17]:
train_dir = "data/coco/images/train2014/"
train_images = list_images_from_dir(train_dir)
train_annot_df = remove_missing_images(train_annot_df, 'image_name', train_images)
train_annot_df.shape

82783


(248349, 7)

In [18]:
val_dir = "data/coco/images/val2014/"
val_images = list_images_from_dir(val_dir)
val_annot_df = remove_missing_images(val_annot_df, 'image_name', val_images)
val_annot_df.shape

40504


(121512, 7)

## Map Answers

In [19]:
def get_anwer_mappings(df, ans_col='multiple_choice_answer'):
  answer_mapping = df[ans_col].value_counts().reset_index()[0:1000]
  answer_mapping.columns = ['answer_mapping', 'answer_freq']
  answer_mapping = answer_mapping[['answer_mapping']].to_dict()
  answer_mapping = answer_mapping.get('answer_mapping')
  inv_answer_mapping = {v: k for k, v in answer_mapping.items()}
  inv_answer_mapping[np.nan] = -1
  answer_mapping[-1] = np.nan
  
  return answer_mapping, inv_answer_mapping

In [20]:
answer_mapping, inv_answer_mapping = get_anwer_mappings(train_annot_df, ans_col='multiple_choice_answer')

In [None]:
def map_answers(df, inv_answer_mapping, ans_col='multiple_choice_answer'):
  df['answer_id'] = df[ans_col].replace(inv_answer_mapping)

  # Replace all string answers that are not in the top 1000 list
  mask = df['answer_id'].str.contains(r'^[0-9]+$')
  mask = ~(mask.fillna(True))
  df.loc[mask, 'answer_id'] = -1

  df['answer_id'] = df['answer_id'].astype('int')

  # Replace all answers with value over 1000
  df.loc[df['answer_id'] >= (len(inv_answer_mapping)-1), 'answer_id'] = -1

  assert len(inv_answer_mapping) >= df['answer_id'].nunique()
  
  return df['answer_id']

def filter_answer(df, unknown=-1):
  return df[df['answer_id'] > unknown]

In [None]:
train_annot_df['answer_id'] = map_answers(train_annot_df, inv_answer_mapping, ans_col='multiple_choice_answer')
#print(train_annot_df.shape)
train_annot_df = filter_answer(train_annot_df, -1)
#print(train_annot_df.shape)
train_annot_df.head()

In [None]:
val_annot_df['answer_id'] = map_answers(val_annot_df, inv_answer_mapping, ans_col='multiple_choice_answer')
#print(val_annot_df.shape)
val_annot_df = filter_answer(val_annot_df, -1)
#print(val_annot_df.shape)
val_annot_df.head()

## Define questions, answer, image table

In [None]:
def get_image_path(img_id, path):
  return f"{path}{img_id}"

def get_data_table(annot_df, open_quest_df, path):
  df = annot_df[['question_id','question_type', 'answer_id', 'multiple_choice_answer','answer_type', 'image_id', 'image_name']]
  df = df.merge(open_quest_df, how='left', on=['question_id', 'image_id'])

  df['file_name'] = df['image_name'].apply(lambda img_id: get_image_path(img_id, path=path))

  df = df[['question_id', 'question', 'question_type', 'answer_id', 'multiple_choice_answer','answer_type', 'image_id', 'file_name']]
  df = df.rename(index=str, columns={"multiple_choice_answer": "name"})
  
  #img_list = os.listdir(f"{path}")
  #print(len(img_list))
  #mask = df['image_path'].str.replace(f"{path}{dataSubType}/", "").isin(img_list)
  
  return df#[mask]

In [None]:
training_df = get_data_table(train_annot_df, train_open_quest_df, path=train_dir)
valid_df    = get_data_table(val_annot_df,   val_open_quest_df,   path=val_dir)

In [30]:
print(training_df.shape)
training_df.head()

(216081, 8)


Unnamed: 0,question_id,question,question_type,answer_id,name,answer_type,image_id,file_name
0,4870251,Is there a shadow?,is there a,0,yes,yes/no,487025,data/coco/images/train2014/COCO_train2014_0000...
1,4870252,Is this one bench or multiple benches?,is this,3,1,other,487025,data/coco/images/train2014/COCO_train2014_0000...
2,780770,Is this a modern train?,is this a,1,no,yes/no,78077,data/coco/images/train2014/COCO_train2014_0000...
3,780771,What color is the stripe on the train?,what color is the,4,white,other,78077,data/coco/images/train2014/COCO_train2014_0000...
4,780772,What is on the other side of the train?,what is on the,89,trees,other,78077,data/coco/images/train2014/COCO_train2014_0000...


# Train VQA Model

In [39]:
!python -m spacy download en_vectors_web_lg

Collecting en_vectors_web_lg==2.0.0 from https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.0.0/en_vectors_web_lg-2.0.0.tar.gz#egg=en_vectors_web_lg==2.0.0
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.0.0/en_vectors_web_lg-2.0.0.tar.gz (661.8MB)
[K    100% |████████████████████████████████| 661.8MB 2.4MB/s ta 0:00:0111  13% |████▎                           | 89.1MB 3.7MB/s eta 0:02:37    20% |██████▌                         | 134.5MB 1.4MB/s eta 0:06:21    21% |██████▉                         | 142.0MB 1.8MB/s eta 0:04:48    30% |█████████▊                      | 201.6MB 7.8MB/s eta 0:01:00    38% |████████████▏                   | 252.5MB 11.3MB/s eta 0:00:37    41% |█████████████▎                  | 274.6MB 1.4MB/s eta 0:04:30    43% |██████████████                  | 288.1MB 4.1MB/s eta 0:01:33    47% |███████████████                 | 312.4MB 9.7MB/s eta 0:00:36    48% |███████████████▋            

In [35]:
!pip install pandarallel

Collecting pandarallel
  Downloading https://files.pythonhosted.org/packages/d9/b0/eff9587b922a7d1abe52eba7f9410800171f39650c8ec1093c4dcdec3918/pandarallel-1.1.1.tar.gz
Collecting pyarrow>=0.12.1 (from pandarallel)
[?25l  Downloading https://files.pythonhosted.org/packages/3f/6c/91a3d949fe0763e60ac181b7b79e74e848e33e402e5e8274cad455519d76/pyarrow-0.13.0-cp37-cp37m-manylinux1_x86_64.whl (48.5MB)
[K    100% |████████████████████████████████| 48.5MB 1.9MB/s eta 0:00:01    42% |█████████████▋                  | 20.7MB 12.0MB/s eta 0:00:03    64% |████████████████████▌           | 31.0MB 6.4MB/s eta 0:00:03
Building wheels for collected packages: pandarallel
  Building wheel for pandarallel (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/anthony/.cache/pip/wheels/4f/5b/bf/ff7b72bacc2a4ba33ffe5c5c6ea6453b936ad0874e3c97655c
Successfully built pandarallel
Installing collected packages: pyarrow, pandarallel
Successfully installed pandarallel-1.1.1 pyarrow-0.13.0


In [36]:
def get_question_features(question):
    ''' For a given question, a unicode string, returns the time series vector
    with each word (token) transformed into a 300 dimension representation
    calculated using Glove Vector '''
    word_embeddings = spacy.load('en_vectors_web_lg')
    tokens = word_embeddings(question)
    question_tensor = np.zeros((1, 26, 300))
    for j in range(len(tokens)):
        question_tensor[0,j,:] = tokens[j].vector
    return question_tensor

In [None]:
from pandarallel import pandarallel

# Initialization
pandarallel.initialize(progress_bar=False, nb_workers=8)

In [None]:
%%timeit
emb_question = training_df.head()['question'].parallel_apply(get_question_features)

In [89]:
import dask.dataframe as dd

In [91]:
training_dask = dd.from_pandas(training_df, npartitions=30)

In [94]:
%%timeit
training_dask.head()['question'].apply(lambda question: get_question_features(question))

51.6 s ± 147 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [99]:
training_df['emb_question'] = training_df['question'].apply(lambda question: get_question_features(question))

KeyboardInterrupt: 

In [None]:
with h5py.File('vqa_question_features.h5','w') as hf:
  hf.create_dataset('train', data=training_df[['question_id','question','answer_id','name','image_id','emb_question']])
  #hf.create_dataset('val', data=val_preds)

In [None]:
valid_df['emb_question'] = valid_df['question'].apply(lambda question: get_question_features(question))

In [None]:
with h5py.File('vqa_question_features.h5','w') as hf:
  #hf.create_dataset('train', data=training_df)
  hf.create_dataset('val', data=valid_df)

list

In [56]:
training_df.head(1)["question"].parallel_apply(get_question_features)

0    [[[-0.08496099710464478, 0.5019999742507935, 0...
Name: question, dtype: object

In [42]:
question = training_df.loc[training_df['question_id'] == 4355530, 'question'].values[0]
question

'What kind of animal is this?'

In [44]:
get_question_features(question)

array([[[-0.038548  ,  0.54251999, -0.21843   , ...,  0.11798   ,
          0.24590001,  0.22872999],
        [-0.15097   ,  0.29793999, -0.093572  , ..., -0.091308  ,
          0.03936   ,  0.32585001],
        [ 0.060216  ,  0.21799   , -0.04249   , ...,  0.11709   ,
         -0.16692001, -0.094085  ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]]])

In [0]:
from __future__ import print_function
import numpy as np
import h5py
import json
import pandas as pd

  
def get_data(args, split='train'):
    
    data = {}
  
    img_norm = args['img_normalize'] 
    
    # Load json file
    dataset = load_input_json(args)
    
    # load image feature
    img_feature = load_img_feature(args, split)
    
    # load h5 file
    data = load_qa_feature(args, split)
    
    if img_norm:
        print('(get_data) Normalizing image feature')
        tem = np.sqrt(np.sum(np.multiply(img_feature, img_feature)))
        img_feature = np.divide(img_feature, np.tile(tem,(1,args['img_vec_dim'])))
    
    if split == "train":
      val_answers = None
    else:
      val_answers = get_val_answers(args, data, dataset)

    return dataset, img_feature[data['img_list']], data, val_answers
    
    
def load_input_json(args):
    dataset = {}
    
    with open(args['input_json']) as data_file:
        print('(get_data) Loading input json file...')
        data = json.load(data_file)
    for key in data.keys():
        dataset[key] = data[key]
        
    return dataset
  
def load_img_feature(args, split): 
    img_feature = None
    
    img_split = f'images_{split}'
  
    with h5py.File(args['input_img_h5'],'r') as hf:
        print('(get_data) Loading image feature...')
        # -----0~82459------
        tem = hf.get(img_split)
        img_feature =  np.array(tem)
        
    return img_feature
  
def load_qa_feature(args, split):
    data = {}
    
    ques_split = f"ques_{split}"
    ques_length_split = f"ques_length_{split}"
    img_pos_split = f"img_pos_{split}"
    question_id_split = f"question_id_{split}"
    
    if split == "train":
      answers_split = f"answers"
    else:
      answers_split = f"MC_ans_test"
    
    
    with h5py.File(args['input_ques_h5'],'r') as hf:
        print('(get_data) Loading h5 file...')
        
        # total number of training data is 215375
        # question is (26, )
        tem = hf.get(ques_split)
        data['question'] = np.array(tem)
        print(f"(get_data - {split}) Nb questions: {len(data[u'question'])}")
        
        # max length is 23
        tem = hf.get(ques_length_split)
        data['length_q'] = np.array(tem)
        
        # total 82460 img
        #-----1~82460-----
        tem = hf.get(img_pos_split)
        # convert into 0~82459
        data['img_list'] = np.array(tem)-1
        print(f"(get_data - {split}) Nb images: {len(data[u'img_list'])}")
        
        # quiestion id
        tem = hf.get(question_id_split)
        data['ques_id'] = np.array(tem)
        
        # answer is 1~1000
        tem = hf.get(answers_split)
        data['answers'] = np.array(tem)-1
        print(f"(get_data - {split}) Nb answers: {len(data[u'answers'])}")
        
    return data

def get_val_answers(args, data, dataset):
  
    def most_common(lst):
      return max(set(lst), key=lst.count)

    # Added by Adi, make sure the ans_file is provided
    nb_data_test = len(data[u'question'])
    val_all_answers_dict = json.load(open(args['ans_file']))
    
    val_answers = np.zeros(nb_data_test, dtype=np.int32)
    ans_to_ix = {v: k for k, v in dataset[u'ix_to_ans'].items()}
    count_of_not_found = 0

    for i in range(nb_data_test):
        qid = data[u'ques_id'][i]
        try : 
            val_ans_ix =int(ans_to_ix[most_common(val_all_answers_dict[str(qid)])]) -1
        except KeyError:
            count_of_not_found += 1
            val_ans_ix = 480
        val_answers[i] = val_ans_ix
    print("(get_data - test) Beware: " + str(count_of_not_found) + " number of val answers not found")
    
    return val_answers

In [0]:
def load_datasets(args):
    
    dataset, train_img_feature, train_data, _          = get_data(args, split="train") # get_train_data(args)
    dataset, test_img_feature,  test_data, val_answers = get_data(args, split="test") # get_test_data(args)

    train_X = [train_data[u'question'], train_img_feature]
    train_Y = np_utils.to_categorical(train_data[u'answers'], args['nb_classes'])
    test_X  = [test_data[u'question'], test_img_feature]
    test_Y  = np_utils.to_categorical(val_answers           , args['nb_classes'])

    print(f"\nTrain data: {len(train_X[0])} & {len(train_X[1])} - {len(train_Y)}\n")
    print(f"\nTest data: {len(test_X[0])} & {len(test_X[1])} - {len(test_Y)}\n")
    
    return dataset, train_X, train_Y, test_X, test_Y 

In [0]:
flow(x, y=None, batch_size=32, shuffle=True, sample_weight=None, seed=None, save_to_dir=None, save_prefix='', save_format='png', subset=None)


In [0]:
train_datagen = ImageDataGenerator(rescale=1./255)

train_generator=train_datagen.flow_from_dataframe(
    dataframe=train_set, 
    directory=train_dir, 
    x_col="file_name", 
    y_col="name", 
    class_mode="categorical", 
    target_size=(150, 150), 
    batch_size=10)

In [0]:
def custom_generator(df, path,  X1, X2, y, batch_size):
  img_datagen = ImageDataGenerator(rescale=1./255)

  img_generator = img_datagen.flow_from_dataframe(
      dataframe=df, 
      directory=path, 
      x_col="file_name", 
      y_col="name", 
      class_mode="categorical", 
      target_size=(150, 150), 
      batch_size=batch_size)
  

  genX2 = gen.flow(X2, y, batch_size=batch_size, seed=1)
  while True:
    X1i = img_generator.next()
    X2i = genX2.next()
    yield [X1i[0], X2i[0]], X1i[1]

In [0]:
custom_generator(x_train, x_train_landmark, y_train, batch_size)

In [0]:
from keras.models import Sequential, Model
from keras.layers.core import Reshape, Activation, Dropout
from keras.layers import Input, LSTM, Multiply, Dense, Embedding, Flatten
from keras.layers import concatenate

def get_cnn(args):
    model_image_in = Input(shape=(args['img_vec_dim'],))
    X1 = Dense(args['num_hidden_units_mlp'])(model_image_in)
    X1 = Activation(args['activation_1'])(X1)
    model_image_out = Dropout(args['dropout'])(X1)
    
    model_image = Model(model_image_in, model_image_out)
    
    return model_image, model_image_in, model_image_out
  
def get_lstm(args):
    model_language_in = Input(shape=(26,))
    X2 = Embedding(12603, 300, input_length=26)(model_language_in)
    X2 = LSTM(args['num_hidden_units_lstm'], return_sequences=True, input_shape=(26, 300))(X2)
    X2 = LSTM(args['num_hidden_units_lstm'], return_sequences=True)(X2)
    X2 = LSTM(args['num_hidden_units_lstm'], return_sequences=False)(X2)
    X2 = Dense(args['num_hidden_units_mlp'])(X2)
    X2 = Activation(args['activation_1'])(X2)
    model_language_out = Dropout(args['dropout'])(X2)
                              
    model_language = Model(model_language_in, model_language_out)
    
    return model_language, model_language_in, model_language_out

def create_DeeperLSTM(args):
  
    # Image model   
    model_image, model_image_in, model_image_out = get_cnn(args)
    
    # Language Model
    model_language, model_language_in, model_language_out = get_lstm(args)    
    
    # Merge models
    merged_in = concatenate([model_language_out, model_image_out])

    for i in range(args['num_hidden_units_mlp']):
        X = Dense(args['num_hidden_units_mlp'])(merged_in)
        X = Activation(args['activation_1'])(X)
        X = Dropout(args['dropout'])(X)

    X = Dense(args['nb_classes'])(X)
    merged_out = Activation(args['class_activation'])(X)

    model = Model([model_language_in, model_image_in], merged_out)
    
    return model


In [0]:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model

# Headline input: meant to receive sequences of 100 integers, between 1 and 10000.
# Note that we can name any layer by passing it a "name" argument.
main_input = Input(shape=(100,), dtype='int32', name='main_input')

# This embedding layer will encode the input sequence
# into a sequence of dense 512-dimensional vectors.
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)

# A LSTM will transform the vector sequence into a single vector,
# containing information about the entire sequence
lstm_out = LSTM(32)(x)

auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)

auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])

# We stack a deep densely-connected network on top
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)

# And finally we add the main logistic regression layer
main_output = Dense(1, activation='sigmoid', name='main_output')(x)

model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])