In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from PIL import Image
import os
import pickle
import json
import cv2
import re
import gc

In [2]:
tf.test.is_gpu_available()

True

In [3]:
# make image dataloader using flow_from_dataframe
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# load data to extract labels
data_dir = '../facebook_challenge_data/'
model_dir = 'models/'

# load data and print sizes
def get_dict(path):
    jsonl_content = open(path, 'r').read()
    data = [json.loads(jline) for jline in jsonl_content.split('\n')]
    return {datum['id'] : datum for datum in data}


train_dict = get_dict(data_dir + 'train.jsonl')
val_dict = get_dict(data_dir + 'dev.jsonl')
test_dict = get_dict(data_dir + 'test.jsonl')

print(len(train_dict))
print(len(val_dict))
print(len(test_dict))

8500
500
1000


In [4]:
# load pretrained LSTM and CNN for text and images
from tensorflow.keras.models import load_model

lstm = load_model(model_dir + 'fb_lstm.h5')
cnn = load_model(model_dir + 'best_fb_inc_cnn.h5')

print(lstm.summary())
print(cnn.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 50, 100)           1264500   
_________________________________________________________________
lstm (LSTM)                  (None, 150)               150600    
_________________________________________________________________
dense (Dense)                (None, 1)                 151       
Total params: 1,415,251
Trainable params: 150,751
Non-trainable params: 1,264,500
_________________________________________________________________
None
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Model)         (None, 8, 8, 2048)        21802784  
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_______________

In [5]:
# create new models that are all but the last layer
from tensorflow.keras.models import Sequential, Model

text_net = Sequential()
for layer in lstm.layers[:-1]: text_net.add(layer)
print(text_net.summary())

img_net = Sequential()
for layer in cnn.layers[:-1]: img_net.add(layer)
print(img_net.summary())

# free up memory from old models:
del lstm
del cnn
gc.collect()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 50, 100)           1264500   
_________________________________________________________________
lstm (LSTM)                  (None, 150)               150600    
Total params: 1,415,100
Trainable params: 150,600
Non-trainable params: 1,264,500
_________________________________________________________________
None
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Model)         (None, 8, 8, 2048)        21802784  
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               67109376  
Total params:

280952

In [6]:
tokenizer = pickle.load(open(model_dir + 'fb_tokenizer.pkl', 'rb'))
pad_len = text_net.layers[0].input_shape[-1]

# build same embedding matrix as in the lstm file
from tensorflow.keras.layers import Dense, Input, Embedding, Conv1D

EMBEDDING_DIM = 100
word_index = word_index = tokenizer.word_index
MAX_SEQ_LEN = pad_len

# map word to embedding
embeddings_index = {}
for line in open(os.path.join('..', 'glove.twitter.27B.100d.txt')):
    values = line.split()
    word = values[0]
    embeddings_index[word] = np.asarray(values[1:], dtype='float32')

# create embedding matrix (words without embeddings get zero embeddings)
embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

token_embedding = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQ_LEN,
                            trainable=False)

In [8]:

# build attention model with image vector as query
# https://www.tensorflow.org/api_docs/python/tf/keras/layers/Attention
from tensorflow.keras.layers import concatenate, Attention, Reshape, Dense, Input, Embedding, Conv1D, Add, Activation, Multiply
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import activations

attention_dim = 256 # 256 in fb paper

#inputs
text_input = Input((text_net.layers[0].input_shape[-1],)) # get rid of None's in front, context/value
img_input = Input((img_net.layers[0].input_shape[1:])) # get rid of None's in front, query

# FCM embeddings
text_embed = text_net(text_input)

# get [batch_size, Tq, dim] embeddings from text
txt_value_embeddings = token_embedding(text_input)
transformed_txt_value_embeddings = Dense(attention_dim)(txt_value_embeddings) # get more dimensions from text
cnn_layer = Conv1D(filters=attention_dim, kernel_size=4) # 1D conv for getting seq from txt
t_prime = cnn_layer(transformed_txt_value_embeddings)

# get [batch_size, Tv, dim] embeddings from image
img_embed = img_net(img_input) # this is the query
g_prime = Dense(attention_dim)(img_embed) # linear activation to get same dimension

print(g_prime.shape, t_prime.shape)
# do attention input to DNN as in link
a = tf.keras.layers.Attention()([g_prime, t_prime])
a = tf.keras.layers.GlobalAveragePooling1D()(a)

# symmetric-gated fusion
gate_dim = 256

beta_a = Add()([Dense(gate_dim, use_bias=False)(a), Dense(gate_dim)(g_prime)]) 
beta_a = Activation(activations.relu)(beta_a) # sigma(W_a * a + U_a * g' + b_a)

beta_g = Add()([Dense(gate_dim, use_bias=False)(a), Dense(gate_dim)(g_prime)]) 
beta_g = Activation(activations.relu)(beta_g) # sigma(W_g * a + U_g * g' + b_g)

m = Add()([Dense(gate_dim, use_bias=False)(a), Dense(gate_dim)(g_prime)]) # sigma(W_m * a + U_m * g' + b_m)
m = Activation(activations.tanh)(m)

f = Add()([Multiply()([beta_a, a]), Multiply()([beta_g, m])])

# concatenate it all together
input_layer = tf.keras.layers.Concatenate()([text_embed, img_embed, f])

x = Dense(2048, activation='relu')(input_layer)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
prediction = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[text_input, img_input], outputs=prediction)
print(model.summary())

optimizer = Adam(lr = 1e-6)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy'])

(None, 256) (None, 47, 256)
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 50, 100)      1264500     input_3[0][0]                    
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 512)          88912160    input_4[0][0]                    
__________________________________________________________________

In [9]:
from random import randint # for random cropping
from tensorflow.keras.preprocessing.sequence import pad_sequences

class FBMMDataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, data_dict, tokenizer, pad_len, batch_size=32, dim=(299, 299), n_channels=3, shuffle=True):
        'Initialization'
        self.dim = dim
        self.data_dict = data_dict
        self.batch_size = batch_size
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.pad_len = pad_len
        self.tokenizer = tokenizer
        
        # build labels list and id list
        self.id_list = list(self.data_dict.keys())
        self.labels = {ID: self.data_dict[ID]['label'] for ID in self.id_list}
        self.img_list = {ID: self.data_dict[ID]['img'] for ID in self.id_list}
            
        # get text dictionary
        self.text_dict = self.process_text(self.id_list)
        
        self.on_epoch_end()
        self.classes = [self.labels[self.id_list[i]] for i in self.indexes]

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.id_list) / self.batch_size)) + 1 # last batch is partial

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:index*self.batch_size + self.batch_size]
        
        
        # Find list of IDs
        id_list_temp = [self.id_list[k] for k in indexes]

        # Generate data
        X_txt, X_img, y = self.__data_generation(id_list_temp)
        
        return (X_txt, X_img), y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.id_list))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, id_list_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X_img = np.empty((len(id_list_temp), *self.dim, self.n_channels))
        X_txt = np.empty((len(id_list_temp), self.pad_len))
        y = np.empty(len(id_list_temp), dtype=int)

        # Generate data
        for i, ID in enumerate(id_list_temp):
            # Store sample
            X_img[i,] = self.process_img(data_dir + self.img_list[ID])
            X_txt[i,] = self.text_dict[ID]

            # Store class
            y[i] = self.labels[ID]

        return X_txt, X_img, y
    
    def process_img(self, path): # method for getting image
        img = Image.open(path)
        img.load()
        img = img.resize(self.dim, Image.ANTIALIAS)
        data = np.asarray(img, dtype='uint8')
        im = self.augment(data)
        
        
        if im.shape==(self.dim[0], self.dim[1]): im = np.stack((im,)*3, axis=-1) # handle grayscale
        if im.shape == (*self.dim, 4): im = im[:,:,:3] # handle weird case
        
        return im
    
    def augment(self, im): # random crop and random mirror
        
        # random crop
        x_max, y_max = im.shape[0], im.shape[1]
        x_start, y_start = randint(0, x_max - self.dim[0]), randint(0, y_max - self.dim[1])
        im = im[x_start:x_start + self.dim[0], y_start:y_start + self.dim[1]]
        
        # random mirror
        if randint(0,1): im = np.flip(im, axis=1)
        
        return im
    
    def process_text(self, id_list):
        
        # matrix for texts
        texts = [self.data_dict[ID]['text'] for ID in id_list]
        sequences = self.tokenizer.texts_to_sequences(texts)
        text_seqs = pad_sequences(sequences, maxlen=self.pad_len)
        
        id_to_seq = dict() # map id to text sequence compatible with embedding layer
        for ID, txt in zip(id_list, text_seqs):
            id_to_seq[ID] = txt
        
        return id_to_seq

In [10]:
# create data generators
tokenizer = pickle.load(open(model_dir + 'fb_tokenizer.pkl', 'rb'))
pad_len = text_net.layers[0].input_shape[-1]

# create data generators
train_gen = FBMMDataGenerator(data_dict=train_dict,
                          tokenizer=tokenizer,
                          pad_len=pad_len,
                          batch_size=32,
                          dim=(299, 299),
                          n_channels=3,
                          shuffle=True)

val_gen = FBMMDataGenerator(data_dict=val_dict,
                          tokenizer=tokenizer,
                          pad_len=pad_len,
                          batch_size=32,
                          dim=(299, 299),
                          n_channels=3,
                          shuffle=True)

In [11]:
# train model
from tensorflow.keras.callbacks import ModelCheckpoint

mcp_save = ModelCheckpoint(model_dir + 'best_sym_gated_fb_weights.h5', 
                           save_weights_only=True, 
                           save_best_only=True, 
                           monitor='val_loss', 
                           mode='min')

history = model.fit_generator(train_gen,
                    validation_data=val_gen,
                    shuffle=True,
                    epochs=10,
                    callbacks=[mcp_save])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
# get the weights from the best model
model.load_weights(model_dir + 'best_sym_gated_fb_weights.h5')

In [16]:
# test
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, accuracy_score
import math

y_val = val_gen.classes

# get AUROC
preds = model.predict_generator(val_gen)
print('Val AUROC:', roc_auc_score(y_val, preds))

# get loss and acc
preds_bin = np.array(preds)
preds_bin[preds>0.5] = 1
preds_bin[preds<=0.5] = 0
print('Val Accuracy:', accuracy_score(y_val, preds_bin))

# get F1
# print('Val F1:', f1_score(y_val, preds_bin, zero_division=1))
# print('Val Precision:', precision_score(y_val, preds_bin, zero_division=1))
# print('Val Recall:', recall_score(y_val, preds_bin, zero_division=1))

Val AUROC: 0.436
Val Accuracy: 0.498


In [None]:
# test the data generator
imgs, texts, ys = train_gen.__getitem__(0)
ids = train_gen.id_list[:32]

for ID, text, img, y in list(zip(ids, texts, imgs, ys))[:3]:
    print(ID)
    print(text)
    img = Image.fromarray(np.uint8(img), 'RGB')
    display(img)
    print('label:', y, '\n\n\n')
