<h1 style="padding-top: 25px;padding-bottom: 25px;text-align: left; padding-left: 10px; background-color: #DDDDDD; 
    color: black;"> <img style="float: left; padding-right: 10px; width: 45px" src="https://raw.githubusercontent.com/Harvard-IACS/2018-CS109A/master/content/styles/iacs.png"> AC295: Advanced Practical Data Science </h1>

## Practicum 2: Visual Question Answering

**Harvard University, Fall 2020**  
**Instructors**: Pavlos Protopapas  

### **Team: $\alpha\beta normal$ $Distri\beta ution$**
#### **Roht Beri, Eduardo Peynetti, Jessica Wijaya, Stuart Neilson**

## Creating Pipeline for Training

### Imports

In [2]:
import os
import requests
import tempfile
import zipfile
import shutil
import json
import time
import sys
import cv2
import numpy as np
import pandas as pd
from collections import Counter
from glob import glob
from google.colab import drive
from tqdm.notebook import trange, tqdm
import subprocess
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.python.keras import backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.utils.layer_utils import count_params
from tensorflow_addons.metrics import F1Score

from sklearn.model_selection import train_test_split

### Utils

In [3]:
# Paths to training and validation data
PATHS_TRAIN = '/content/data/vqa_raw_train2014.tfrecords'
PATHS_VAL = '/content/data/vqa_raw_vl2014.tfrecords'

# Constants
IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_CHANNELS = 3
K = 10
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Pipeline variables
batch_size = 64
train_buffer_size = 800
val_buffer_size = 200
prefetch = 32

In [4]:
# Get Top K answers
def get_top_K_answers(k):
    answers = pd.read_csv("/content/data/answers.csv", index_col=0)
    answers = answers.index[:k]
    return list(answers)

TOP_ANSWERS = get_top_K_answers(K)
TOP_ANSWERS = tf.constant(TOP_ANSWERS)

In [5]:


# Function to parse data features
def _parse_features_function(example):
  # Parse the input tf.train.Example proto using the dictionary above.
  tf_records_features = {  
      'image_path': tf.io.FixedLenFeature([], tf.string),
      'question': tf.io.FixedLenFeature([], tf.string),
      'answer': tf.io.FixedLenFeature([], tf.string)
  }
  return tf.io.parse_single_example(example, tf_records_features)

# Filter if answer is no
def filter_fn(x):
    #use broadcasting for element-wise tensor operation
    broadcast_equal = tf.equal(TOP_ANSWERS, x['answer'])
    broadcast_equal_int = tf.cast(broadcast_equal, tf.int8)
    broadcast_sum = tf.reduce_sum(broadcast_equal_int)
    return broadcast_sum > 0

# Read image and resize it
def read_and_resize(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    img = tf.image.resize_with_pad(img, IMG_HEIGHT, IMG_WIDTH)
    img = img/255.0
    return img

# Structure the data for training
def structure_data(data):
    path = data['image_path']
    image = read_and_resize(path)
    question = data['question']
    answer = data['answer']
    return ((image, question), answer)


### Build Pipeline

In [6]:
# ############## #
# # Train data # #
# ############## #
train_file = ['/content/data/vqa_raw_train2014.tfrecords']
train = tf.data.TFRecordDataset(
    train_file, 
    buffer_size=100, 
    num_parallel_reads=AUTOTUNE
)
train = train.map(_parse_features_function, num_parallel_calls=AUTOTUNE)
train = train.filter(filter_fn)
train = train.map(structure_data, num_parallel_calls=AUTOTUNE)
train = train.shuffle(buffer_size=train_buffer_size).batch(batch_size)
train = train.cache().prefetch(prefetch)

# ################### #
# # Validation data # #
# ################### #
val_file = ['/content/data/vqa_raw_val2014.tfrecords']

valid = tf.data.TFRecordDataset(
    val_file, 
    buffer_size=100, 
    num_parallel_reads=AUTOTUNE
)

valid = valid.map(_parse_features_function, num_parallel_calls=AUTOTUNE)
valid = valid.filter(filter_fn)
valid = valid.map(structure_data, num_parallel_calls=AUTOTUNE)
valid = valid.shuffle(buffer_size=val_buffer_size).batch(batch_size)
valid = valid.cache().prefetch(prefetch)

In [7]:
train

<PrefetchDataset shapes: (((None, 224, 224, 3), (None,)), (None,)), types: ((tf.float32, tf.string), tf.string)>

In [8]:
valid

<PrefetchDataset shapes: (((None, 224, 224, 3), (None,)), (None,)), types: ((tf.float32, tf.string), tf.string)>