<h1 style="padding-top: 25px;padding-bottom: 25px;text-align: left; padding-left: 10px; background-color: #DDDDDD; 
    color: black;"> <img style="float: left; padding-right: 10px; width: 45px" src="https://raw.githubusercontent.com/Harvard-IACS/2018-CS109A/master/content/styles/iacs.png"> AC295: Advanced Practical Data Science </h1>

## Practicum 2: Visual Question Answering

**Harvard University, Fall 2020**  
**Instructors**: Pavlos Protopapas  

### **Team: $\alpha\beta normal$ $Distri\beta ution$**
#### **Roht Beri, Eduardo Peynetti, Jessica Wijaya, Stuart Neilson**

## Pruned Model

### This notebook generates and tests a pruned version of the model

### Install Packages

In [36]:
!pip3 install transformers
!pip install -q tensorflow_model_optimization



### Imports

In [37]:
import os
import requests
import tempfile
import zipfile
import shutil
import json
import time
import sys
import cv2
import numpy as np
import pandas as pd
from collections import Counter
from glob import glob
from google.colab import drive
from tqdm.notebook import trange, tqdm
import subprocess
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub
import tensorflow_model_optimization as tfmot

from tensorflow import keras
from tensorflow.python.keras import backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from keras.utils.layer_utils import count_params
from tensorflow_addons.metrics import F1Score
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper
from tensorflow_model_optimization.sparsity.keras import prune_low_magnitude

from transformers import BertTokenizer, TFBertModel, TFDistilBertModel

### Download Data

In [3]:
if not os.path.exists('/content/data'):
    os.mkdir('/content/data')

!gsutil cp -r gs://practicum2-abnormal-distribution/big2 /content/data

Copying gs://practicum2-abnormal-distribution/big2/answers.csv...
Copying gs://practicum2-abnormal-distribution/big2/functional_1.h5...
Copying gs://practicum2-abnormal-distribution/big2/train2014_tf/vaq_raw_train2014_00-of-10.records...
/ [3 files][  1.2 GiB/  1.2 GiB]  120.0 MiB/s                                   
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://practicum2-abnormal-distribution/big2/train2014_tf/vaq_raw_train2014_01-of-10.records...
Copying gs://practicum2-abnormal-distribution/big2/train2014_tf/vaq_raw_train2014_02-of-10.records...
Copying gs://practicum2-abnormal-distribution/big2/train2014_tf/vaq_raw_train2014_03-of-10.records...
Copying gs://practicum2-abnormal-distribution/big2/train2014_tf/vaq_raw_train2014_04-of-10.records...
Copying gs://practicu

In [4]:
#drive.mount('/content/drive', force_remount=False)
# if not os.path.exists('/content/data'):
#     os.mkdir('/content/data')
#
#!cp -r '/content/drive/My Drive/Practicum2Data/big2' /content/data

### Utils

In [38]:
# we use the following to save the models
class JsonEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, decimal.Decimal):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return super(JsonEncoder, self).default(obj)

# save_model saves everything. weights, statuses and results. 
def save_model(model,training_results,execution_time, learning_rate, epochs, optimizer, evaluation_results,path="models"):
  model_name=model.name
  # Get the model train history
  model_train_history = training_results.history

  # Ensure path exists
  if not os.path.exists(path):
      os.mkdir(path)

  # Save the enitire model (structure + weights)
  model.save(os.path.join(path,model_name+".hdf5"))

  # Save only the weights
  model.save_weights(os.path.join(path,model_name+".h5"))

  # Save the structure only
  model_json = model.to_json()
  with open(os.path.join(path,model_name+".json"), "w") as json_file:
      json_file.write(model_json)
    
  model_size = get_model_size(model_name=model_name)

  # Save model history
  with open(os.path.join("models",model_name+"_train_history.json"), "w") as json_file:
      json_file.write(json.dumps(model_train_history,cls=JsonEncoder))

  trainable_parameters = count_params(model.trainable_weights)
  non_trainable_parameters = count_params(model.non_trainable_weights)
  total_params = trainable_parameters + non_trainable_parameters

  # Save model metrics
  metrics ={
      "total_params":total_params,
      "execution_time":execution_time,
      "loss":evaluation_results[0],
      "accuracy":evaluation_results[1],
      "model_size":model_size,
      "learning_rate":learning_rate,
      "epochs":epochs,
      "optimizer":type(optimizer).__name__,
      "name": model_name,
      "id": int(time.time())
  }

  with open(os.path.join("models",model.name+"_metrics.json"), "w") as json_file:
      json_file.write(json.dumps(metrics,cls=JsonEncoder))

def get_model_size(path="models",model_name="model01"):
  model_size = os.stat(os.path.join(path,model_name+".hdf5")).st_size
  return model_size

def evaluate_model(model,test_data, training_results,execution_time, learning_rate, epochs, 
                   optimizer,save=True, 
                   loss_metrics=["loss","val_loss"],
                   acc_metrics=["accuracy","val_accuracy"]):
    
  # Get the model train history
  model_train_history = training_results.history
  # Get the number of epochs the training was run for
  num_epochs = len(model_train_history[loss_metrics[0]])

  # Plot training results
  fig = plt.figure(figsize=(15,5))
  axs = fig.add_subplot(1,2,1)
  axs.set_title('Loss')
  # Plot all metrics
  for metric in loss_metrics:
      axs.plot(np.arange(0, num_epochs), model_train_history[metric], label=metric)
  axs.legend()
  
  axs = fig.add_subplot(1,2,2)
  axs.set_title('Accuracy')
  # Plot all metrics
  for metric in acc_metrics:
      axs.plot(np.arange(0, num_epochs), model_train_history[metric], label=metric)
  axs.legend()

  plt.show()
  
  # Evaluate on test data
  evaluation_results = model.evaluate(test_data, return_dict=True)
  print(evaluation_results)

  evaluation_results = [evaluation_results[loss_metrics[0]], evaluation_results[acc_metrics[0]]]
  
  if save:
      # Save model
      save_model(model,training_results,execution_time, learning_rate, epochs, optimizer, evaluation_results)
  
  return evaluation_results

In [39]:
# Get Top K answers
def get_top_K_answers(k):
    answers = pd.read_csv("/content/data/big2/answers.csv", index_col=0)
    answers = answers.iloc[:k]
    return answers

In [40]:
# Function to parse data features
def _parse_features_function(example):
    # Parse the input tf.train.Example proto using the dictionary above.
    tf_records_features = {
        'image_raw': tf.io.FixedLenFeature([], tf.string), 
        'question' : tf.io.FixedLenFeature([], tf.string),
        'input_ids': tf.io.FixedLenFeature([], tf.string),
        'token_type_ids': tf.io.FixedLenFeature([], tf.string),
        'attention_mask': tf.io.FixedLenFeature([], tf.string), 
        'answer': tf.io.FixedLenFeature([], tf.int64)
    }
    return tf.io.parse_single_example(example, tf_records_features)


# Filter if answer is no
def filter_fn(x):
    return x['answer'] < k


# Read image and resize it
def read_and_decode(img):
    img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
    img = tf.cast(img, tf.float32)/255.0
    return img


# Structure the data for training
def structure_data(data):
    image = data['image_raw']
    image = read_and_decode(image)
    
    input_ids = tf.io.decode_raw(data['input_ids'], tf.int32)
    attention_mask = tf.io.decode_raw(data['attention_mask'], tf.int32)
    token_type_ids = tf.io.decode_raw(data['token_type_ids'], tf.int32)
    
    answer = data['answer']

    return ((image, (input_ids, token_type_ids, attention_mask)), answer)

### Important Variables and Constants

In [41]:
# Constants
IMG_WIDTH = 224
IMG_HEIGHT = 224
IMG_CHANNELS = 3

AUTOTUNE = tf.data.experimental.AUTOTUNE

# Pipeline variables
k = 10
batch_size = 32
train_buffer_size = 32
val_buffer_size = 32
prefetch = AUTOTUNE

### Get Top Answers

In [42]:
top_answers = get_top_K_answers(k)
TOP_ANSWERS = tf.constant(top_answers)

### Build Pipeline

In [43]:
# ############## #
# # Train data # #
# ############## #
tfrecords_pattern_path = "/content/data/big2/train2014_tf/vaq_raw_train2014_*-of-*.records"
train_files = tf.io.matching_files(tfrecords_pattern_path)
train_files = tf.random.shuffle(train_files)
train_shards = tf.data.Dataset.from_tensor_slices(train_files)

train = train_shards.interleave(tf.data.TFRecordDataset)
train = train.map(_parse_features_function, num_parallel_calls=AUTOTUNE)
train = train.filter(filter_fn)
train = train.map(structure_data, num_parallel_calls=AUTOTUNE)
#train = train.shuffle(buffer_size=train_buffer_size)
train = train.batch(batch_size)
#train = train.cache().prefetch(prefetch)

# ################### #
# # Validation data # #
# ################### #
tfrecords_pattern_path = "/content/data/big2/val2014_tf/vaq_raw_val2014_*-of-*.records"
val_files = tf.io.matching_files(tfrecords_pattern_path)
val_files = tf.random.shuffle(val_files)
val_shards = tf.data.Dataset.from_tensor_slices(val_files)

valid = val_shards.interleave(tf.data.TFRecordDataset)
valid = valid.map(_parse_features_function, num_parallel_calls=AUTOTUNE)
valid = valid.filter(filter_fn)
valid = valid.map(structure_data, num_parallel_calls=AUTOTUNE)
#valid = valid.shuffle(buffer_size=val_buffer_size)
valid = valid.batch(batch_size)
#valid = valid.cache().prefetch(prefetch)

### Build Distillation Model

In [9]:
def build_student_model(image_height, image_width, num_channels, num_classes, model_name='student'):
  # Model input
  input_shape = [image_height, image_width, num_channels]  # height, width, channels
  model_name =  model_name +"_"+ str(int(time.time()))

  image_input = layers.Input(shape=input_shape)
  distill_img = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu')(image_input)
  distill_img = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(distill_img)
  distill_img = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(2, 2), padding="same", activation='relu')(distill_img)
  distill_img = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(distill_img)

  image_hidden_states = layers.Flatten()(distill_img)
  image_hs = layers.Lambda(lambda x: K.l2_normalize(x,axis=1))(image_hidden_states)

  input_ids = layers.Input(shape=(24,), dtype=tf.int32)
  #token_type_ids = layers.Input(shape=(24,), dtype=tf.int32)
  #attention_mask = layers.Input(shape=(24,), dtype=tf.int32)

  distill_bert = TFDistilBertModel.from_pretrained('distilbert-base-uncased', return_dict=True)
  distill_bert.trainable = False
  question = distill_bert(
      input_ids, 
      # token_type_ids=token_type_ids, 
      #attention_mask=attention_mask
    )
  question_hs = layers.Flatten()(question[0])
  
  cross_hs = layers.concatenate([image_hs, question_hs])
  x = layers.Dense(32, activation='relu')(cross_hs)
  output = layers.Dense(units=num_classes)(x)

  model = Model(inputs=[image_input, (input_ids, )], outputs=output)

  return model

### Load Trained Model

In [10]:
# Optimizer
learning_rate = 0.001 
optimizer = optimizers.Adam(lr=learning_rate)

# Loss
student_loss = losses.SparseCategoricalCrossentropy(from_logits=True)

# Free up memory
K.clear_session()

# Build Student model
student_model = build_student_model(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS, k, model_name='student_distill')

# Load Weights
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

student_model.load_weights("/content/drive/My Drive/Practicum2Data/student_model.h5")

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertModel: ['vocab_layer_norm', 'vocab_projector', 'vocab_transform', 'activation_13']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).






### Set Parameters for Pruning

In [11]:
# Set Pruning Parameters
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=7138*2)
}

def no_bert(layer):
    if layer.name in ['tf_distil_bert_model'] :
        return layer
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)

prune_model = tf.keras.models.clone_model(
    student_model,
    clone_function=no_bert,
)

Instructions for updating:
Please use `layer.add_weight` method instead.


In [12]:
# Compile the pruning Model
prune_model.compile(optimizer=optimizers.Adam(lr=0.001),
                      loss=losses.SparseCategoricalCrossentropy(from_logits=True),
                      metrics=['accuracy'])

callbacks = [keras.callbacks.ModelCheckpoint( 
        filepath='/content/data/prune_model.h5', 
        monitor='val_accuracy', 
        save_best_only=True, 
        save_weights_only=True
    ), tfmot.sparsity.keras.UpdatePruningStep()]

### Prune the Model

In [13]:
start_time = time.time()
pruning_results = prune_model.fit(
    train,
    validation_data=valid,
    callbacks = callbacks,
    class_weight = (top_answers.sum()/top_answers).reset_index().frequency.to_dict(),
    epochs=2, 
    verbose=1
)
execution_time = (time.time() - start_time)/60.0
print("Training execution time (mins)",execution_time)

Epoch 1/2
Epoch 2/2
Training execution time (mins) 11.536517075697581


In [14]:
# code from https://github.com/tensorflow/model-optimization/blob/master/tensorflow_model_optimization/python/core/sparsity/keras/prune.py
def _strip_pruning_wrapper(layer):
    if layer.name in ['tf_distil_bert_model'] :
        return layer
    if isinstance(layer, tf.keras.Model):
      # A keras model with prunable layers
      return keras.models.clone_model(
          layer, input_tensors=None, clone_function=_strip_pruning_wrapper)
    if isinstance(layer, pruning_wrapper.PruneLowMagnitude):
      # The _batch_input_shape attribute in the first layer makes a Sequential
      # model to be built. This makes sure that when we remove the wrapper from
      # the first layer the model's built state preserves.
      if not hasattr(layer.layer, '_batch_input_shape') and hasattr(
          layer, '_batch_input_shape'):
        layer.layer._batch_input_shape = layer._batch_input_shape
      return layer.layer
    return layer

prune_extracted_model = tf.keras.models.clone_model(
    prune_model,
    clone_function=_strip_pruning_wrapper
)

prune_extracted_model.compile(optimizer=optimizers.Adam(lr=0.001),
                      loss=losses.SparseCategoricalCrossentropy(from_logits=True),
                      metrics=['accuracy'])


In [15]:
prune_extracted_model.save_weights('/content/drive/My Drive/Practicum2Data/prune_extracted_model.h5')
prune_extracted_model.save('/content/drive/My Drive/Practicum2Data/pruned_extracted_model')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: /content/drive/My Drive/Practicum2Data/pruned_extracted_model/assets


### Quantization of the Model

In [16]:
converter = tf.lite.TFLiteConverter.from_keras_model(prune_extracted_model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_quant_model = converter.convert()

with open('/content/drive/My Drive/Practicum2Data/quantized_and_pruned.tflite', 'wb') as f:
  f.write(tflite_quant_model)

INFO:tensorflow:Assets written to: /tmp/tmp2vm8dxm1/assets


INFO:absl:Using experimental converter: If you encountered a problem please file a bug. You can opt-out by setting experimental_new_converter=False


### Check Model Sizes

In [17]:
def check_model_weights(model):
  for i, w in enumerate(model.get_weights()):
    print(model.weights[i].name,"Total:",w.size, "Zeros:", round(np.sum(w == 0) / w.size * 100,2),"%")

check_model_weights(prune_extracted_model)

conv2d/kernel:0 Total: 864 Zeros: 79.98 %
conv2d/bias:0 Total: 32 Zeros: 0.0 %
conv2d_1/kernel:0 Total: 9216 Zeros: 80.0 %
conv2d_1/bias:0 Total: 32 Zeros: 0.0 %
tf_distil_bert_model/distilbert/embeddings/word_embeddings/weight:0 Total: 23440896 Zeros: 0.0 %
tf_distil_bert_model/distilbert/embeddings/position_embeddings/embeddings:0 Total: 393216 Zeros: 0.0 %
tf_distil_bert_model/distilbert/embeddings/LayerNorm/gamma:0 Total: 768 Zeros: 0.0 %
tf_distil_bert_model/distilbert/embeddings/LayerNorm/beta:0 Total: 768 Zeros: 0.0 %
tf_distil_bert_model/distilbert/transformer/layer_._0/attention/q_lin/kernel:0 Total: 589824 Zeros: 0.0 %
tf_distil_bert_model/distilbert/transformer/layer_._0/attention/q_lin/bias:0 Total: 768 Zeros: 0.0 %
tf_distil_bert_model/distilbert/transformer/layer_._0/attention/k_lin/kernel:0 Total: 589824 Zeros: 0.0 %
tf_distil_bert_model/distilbert/transformer/layer_._0/attention/k_lin/bias:0 Total: 768 Zeros: 0.0 %
tf_distil_bert_model/distilbert/transformer/layer_._0/a

In [18]:
with zipfile.ZipFile('/content/data/vqa_model.zip', "w", compression=zipfile.ZIP_DEFLATED) as f:
    f.write('/content/data/big2/vqa_model.h5')
print("Original model before zipping: %.2f Kb"% (os.path.getsize('/content/data/big2/vqa_model.h5') / float(1000)))
print("Original model after zipping: %.2f Kb"% (os.path.getsize('/content/data/vqa_model.zip') / float(1000)))

Original model before zipping: 624304.14 Kb
Original model after zipping: 578842.27 Kb


In [19]:
with zipfile.ZipFile('/content/data/prune_model.zip', "w", compression=zipfile.ZIP_DEFLATED) as f:
    f.write('/content/data/prune_model.h5')
print("Pruned model before zipping: %.2f Kb"% (os.path.getsize('/content/data/prune_model.h5') / float(1000)))
print("Pruned model after zipping: %.2f Kb"% (os.path.getsize('/content/data/prune_model.zip') / float(1000)))

Pruned model before zipping: 272016.46 Kb
Pruned model after zipping: 245880.97 Kb


In [20]:
with zipfile.ZipFile('/content/data/prune_extracted_model.zip', "w", compression=zipfile.ZIP_DEFLATED) as f:
    f.write('/content/data/prune_extracted_model.h5')
print("Extracted pruned model before zipping: %.2f Kb"% (os.path.getsize('/content/data/prune_extracted_model.h5') / float(1000)))
print("Extracted pruned model after zipping: %.2f Kb"% (os.path.getsize('/content/data/prune_extracted_model.zip') / float(1000)))

Extracted pruned model before zipping: 268793.38 Kb
Extracted pruned model after zipping: 245772.15 Kb


In [21]:
with zipfile.ZipFile('/content/data/quantized_and_pruned.zip', "w", compression=zipfile.ZIP_DEFLATED) as f:
    f.write('quantized_and_pruned.tflite')
print("Extracted quantized model before zipping: %.2f Kb"% (os.path.getsize('quantized_and_pruned.tflite') / float(1000)))
print("Extracted quantized model after zipping: %.2f Kb"% (os.path.getsize('/content/data/quantized_and_pruned.zip') / float(1000)))

Extracted quantized model before zipping: 133764.06 Kb
Extracted quantized model after zipping: 121088.62 Kb


### Evaluate Models

In [22]:
prune_evaluation_results = prune_extracted_model.evaluate(valid, return_dict=True)
print(prune_evaluation_results)

{'loss': 1.0011489391326904, 'accuracy': 0.43779629468917847}


In [48]:
def evaluate_model(interpreter):
  input_index1 = interpreter.get_input_details()[0]["index"]
  input_index2 = interpreter.get_input_details()[1]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on ever y image in the "test" dataset.
  prediction_digits = []
  sum = 0
  accurate = 0
  for i, test in enumerate(valid):
      if i==200:
          break
    
      for j in range(32):
        # Pre-processing: add batch dimension and convert to float32 to match with
        # the model's input data format.
        test_image = np.expand_dims(test[0][0][j], axis=0).astype(np.float32)
        interpreter.set_tensor(input_index1, test_image)

        test_ques = np.expand_dims(test[0][1][0][j], axis=0)
        interpreter.set_tensor(input_index2, test_ques)

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest
        # probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)
        sum += 1
        accurate += digit==test[1][j].numpy()

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  #prediction_digits = np.array(prediction_digits)
  accuracy = accurate/sum
  return accuracy

In [49]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

interpreter = tf.lite.Interpreter(model_path='/content/drive/My Drive/Practicum2Data/quantized_and_pruned.tflite')
interpreter.allocate_tensors()

start = time.time()

test_accuracy = evaluate_model(interpreter)

execution_time = time.time() - start

print('Pruned and quantized TFLite test_accuracy:', test_accuracy)
print('Average Inference time:{}'.format(execution_time/(32*200)))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Pruned and quantized TFLite test_accuracy: 0.38125
Average Inference time:0.08021565843373538
