# Import required libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.cm as cm
from matplotlib import pyplot as plt 
from keras.models import Model
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# packages for learning from crowds
from crowd_layer.crowd_layers import CrowdsRegression, MaskedMultiMSE

# prevent tensorflow from allocating the entire GPU memory at once
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

Using TensorFlow backend.


# Configuration parameters

In [2]:
NUM_RUNS = 30
DATA_PATH = "/home/fmpr/datasets/deep-crowds-datasets/MovieReviews/"
BATCH_SIZE = 128
N_EPOCHS = 100
GLOVE_DIR = "/home/fmpr/datasets/glove.6B/"
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 300

# Load data

In [3]:
def read_texts(filename):
    f = open(filename)
    data = [line.strip() for line in f]
    f.close()
    return data

In [4]:
texts_all = read_texts(DATA_PATH+"texts_all.txt")
texts_train = read_texts(DATA_PATH+"texts_train.txt")
targets_train = np.loadtxt(DATA_PATH+"ratings_train.txt")
targets_train_mean = np.loadtxt(DATA_PATH+"ratings_train_mean.txt")
targets_train_ds = np.loadtxt(DATA_PATH+"ratings_train_DS.txt")
texts_test = read_texts(DATA_PATH+"texts_test.txt")
targets_test = np.loadtxt(DATA_PATH+"ratings_test.txt")

print("Num. train texts: %d" % len(texts_train))
print("Num. test texts:  %d" % len(texts_test))

Num. train texts: 1498
Num. test texts:  3508


# Load crowdsourced answers from Mechanical Turk

In [5]:
answers = pd.read_csv(DATA_PATH+"answers.txt", header=None, delimiter=" ").as_matrix()
answers = answers[:,:-1]
print("AMT answers matrix shape: %s" % str(answers.shape))
N_ANNOT = answers.shape[1]
print("Num. annotators: %d" % N_ANNOT)

AMT answers matrix shape: (1498, 135)
Num. annotators: 135


# Standerdize targets

In [6]:
mean_target = np.mean(targets_train)
std_target = np.std(targets_train)
print("Mean target: %.3f" % mean_target)
print("Std. target: %.3f" % std_target)
targets_train = (np.array(targets_train) - mean_target) / std_target
targets_train_mean = (np.array(targets_train_mean) - mean_target) / std_target
targets_train_ds = (np.array(targets_train_ds) - mean_target) / std_target
targets_test = (np.array(targets_test) - mean_target) / std_target
for i in xrange(answers.shape[0]):
    for r in xrange(answers.shape[1]):
        if answers[i,r] != -1:
            answers[i,r] = (answers[i,r] - mean_target) / std_target
        else:
        	answers[i,r] = 999999999 # use this ugly trick to encode a missing

Mean target: 0.574
Std. target: 0.183


# Build index mapping words in the embeddings set to their embedding vector

In [7]:
embeddings_index = {}
f = open(GLOVE_DIR + 'glove.6B.300d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

# Vectorize the text samples into a 2D integer tensor and pad sequences

In [8]:
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts_all)
sequences_train = tokenizer.texts_to_sequences(texts_train)
sequences_test = tokenizer.texts_to_sequences(texts_test)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data_train = pad_sequences(sequences_train, maxlen=MAX_SEQUENCE_LENGTH)
data_test = pad_sequences(sequences_test, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of train tensor:', data_train.shape)
print('Shape of test tensor:', data_test.shape)

Found 46259 unique tokens.
('Shape of train tensor:', (1498, 1000))
('Shape of test tensor:', (3508, 1000))


# Prepare embedding matrix

In [9]:
print('Preparing embedding matrix.')
num_words = min(MAX_NB_WORDS, len(word_index))
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word_index.items():
    if i >= MAX_NB_WORDS:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

Preparing embedding matrix.


# Define the base deep learning model

Here we shall use features representation produced by the VGG16 network as the input. Our base model is then simply composed by one densely-connected layer with 128 hidden units and an output dense layer. We use 50% dropout between the two dense layers.

In [10]:
def build_base_model():
    # load pre-trained word embeddings into an Embedding layer
    # note that we set trainable = False so as to keep the embeddings fixed
    embedding_layer = Embedding(num_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False)

    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)
    x = Conv1D(128, 3, activation='relu')(embedded_sequences)
    x = MaxPooling1D(5)(x)
    x = Dropout(0.5)(x)
    x = Conv1D(128, 5, activation='relu')(x)
    x = MaxPooling1D(5)(x)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    preds = Dense(1, activation='linear')(x)

    base_model = Model(sequence_input, preds)
    base_model.compile(loss='mse', optimizer='adam')

    return base_model, sequence_input, preds

# Auxiliary function for evaluating the models

In [11]:
def compute_error(trues, predicted):
    corr = np.corrcoef(predicted, trues)[0,1]
    mae = np.mean(np.abs(predicted - trues))
    mse = np.mean((predicted - trues)**2)
    rmse = np.sqrt(np.mean((predicted - trues)**2))
    r2 = max(0, 1 - np.sum((trues-predicted)**2) / np.sum((trues - np.mean(trues))**2))

    return corr, mae, mse, rmse, r2

def eval_model(model, test_data, test_labels):
    # testset error
    predicted = model.predict(test_data)[:,0] * std_target + mean_target
    trues = test_labels * std_target + mean_target
    corr_test, mae_test, mse, rmse_test, r2_test = compute_error(trues, predicted)
    print("R2 Test:   %.3f" % r2_test)

    return corr_test, mae_test, rmse_test, r2_test

# Train the model on the true labels (ground truth) and evaluate on testset

In [12]:
model, _, _ = build_base_model()
model.fit(data_train, targets_train, batch_size=BATCH_SIZE, epochs=N_EPOCHS, shuffle=True, verbose=2)

Epoch 1/100
1s - loss: 0.9982
Epoch 2/100
0s - loss: 0.9682
Epoch 3/100
0s - loss: 0.9082
Epoch 4/100
0s - loss: 0.7853
Epoch 5/100
0s - loss: 0.6910
Epoch 6/100
0s - loss: 0.5775
Epoch 7/100
0s - loss: 0.4415
Epoch 8/100
0s - loss: 0.3370
Epoch 9/100
0s - loss: 0.2475
Epoch 10/100
0s - loss: 0.2256
Epoch 11/100
0s - loss: 0.1557
Epoch 12/100
0s - loss: 0.1254
Epoch 13/100
0s - loss: 0.1152
Epoch 14/100
0s - loss: 0.0894
Epoch 15/100
0s - loss: 0.0907
Epoch 16/100
0s - loss: 0.0866
Epoch 17/100
0s - loss: 0.0861
Epoch 18/100
0s - loss: 0.0726
Epoch 19/100
0s - loss: 0.0702
Epoch 20/100
0s - loss: 0.0594
Epoch 21/100
0s - loss: 0.0614
Epoch 22/100
0s - loss: 0.0534
Epoch 23/100
0s - loss: 0.0587
Epoch 24/100
0s - loss: 0.0748
Epoch 25/100
0s - loss: 0.0671
Epoch 26/100
0s - loss: 0.0666
Epoch 27/100
0s - loss: 0.0652
Epoch 28/100
0s - loss: 0.0512
Epoch 29/100
0s - loss: 0.0477
Epoch 30/100
0s - loss: 0.0432
Epoch 31/100
0s - loss: 0.0451
Epoch 32/100
0s - loss: 0.0391
Epoch 33/100
0s -

<keras.callbacks.History at 0x7f5c614896d0>

In [13]:
corr_test, mae_test, rmse_test, r2_test = eval_model(model, data_test, targets_test)

R2 Test:   0.453


# Train the model on the output of majority voting and evaluate on testset

In [14]:
model, _, _ = build_base_model()
model.fit(data_train, targets_train_mean, batch_size=BATCH_SIZE, epochs=N_EPOCHS, shuffle=True, verbose=2)

Epoch 1/100
0s - loss: 0.7590
Epoch 2/100
0s - loss: 0.7391
Epoch 3/100
0s - loss: 0.6960
Epoch 4/100
0s - loss: 0.6638
Epoch 5/100
0s - loss: 0.5879
Epoch 6/100
0s - loss: 0.4720
Epoch 7/100
0s - loss: 0.3754
Epoch 8/100
0s - loss: 0.3676
Epoch 9/100
0s - loss: 0.2721
Epoch 10/100
0s - loss: 0.2234
Epoch 11/100
0s - loss: 0.1791
Epoch 12/100
0s - loss: 0.1554
Epoch 13/100
0s - loss: 0.1222
Epoch 14/100
0s - loss: 0.1105
Epoch 15/100
0s - loss: 0.0839
Epoch 16/100
0s - loss: 0.0756
Epoch 17/100
0s - loss: 0.0765
Epoch 18/100
0s - loss: 0.0619
Epoch 19/100
0s - loss: 0.0580
Epoch 20/100
0s - loss: 0.0510
Epoch 21/100
0s - loss: 0.0492
Epoch 22/100
0s - loss: 0.0433
Epoch 23/100
0s - loss: 0.0437
Epoch 24/100
0s - loss: 0.0442
Epoch 25/100
0s - loss: 0.0612
Epoch 26/100
0s - loss: 0.0487
Epoch 27/100
0s - loss: 0.0471
Epoch 28/100
0s - loss: 0.0429
Epoch 29/100
0s - loss: 0.0416
Epoch 30/100
0s - loss: 0.0376
Epoch 31/100
0s - loss: 0.0334
Epoch 32/100
0s - loss: 0.0340
Epoch 33/100
0s -

<keras.callbacks.History at 0x7f5c259d9ed0>

In [15]:
corr_test, mae_test, rmse_test, r2_test = eval_model(model, data_test, targets_test)

R2 Test:   0.301


# Train the model using proposed DL-B approach and evaluate on testset

We start by adding a new layer (CrowdsRegression) on top of our neural network. We then require a special loss (MaskedMultiMSE) to handle the missing labels from some of the annotators.

In [16]:
base_model, sequence_input, preds = build_base_model()

# add crowds layer on top of the base model
ma_preds = CrowdsRegression(N_ANNOT, conn_type="B")(preds)

# instantiate specialized masked loss to handle missing answers
loss = MaskedMultiMSE().loss

# compile model with masked loss and train
model = Model(sequence_input, ma_preds)
model.compile(optimizer='adam', loss=loss)

# train model
model.fit(data_train, answers, batch_size=BATCH_SIZE, epochs=N_EPOCHS, shuffle=True, verbose=2)

Epoch 1/100
0s - loss: 0.0492
Epoch 2/100
0s - loss: 0.0474
Epoch 3/100
0s - loss: 0.0465
Epoch 4/100
0s - loss: 0.0442
Epoch 5/100
0s - loss: 0.0416
Epoch 6/100
0s - loss: 0.0375
Epoch 7/100
0s - loss: 0.0361
Epoch 8/100
0s - loss: 0.0345
Epoch 9/100
0s - loss: 0.0312
Epoch 10/100
0s - loss: 0.0289
Epoch 11/100
0s - loss: 0.0266
Epoch 12/100
0s - loss: 0.0252
Epoch 13/100
0s - loss: 0.0241
Epoch 14/100
0s - loss: 0.0236
Epoch 15/100
0s - loss: 0.0223
Epoch 16/100
0s - loss: 0.0217
Epoch 17/100
0s - loss: 0.0214
Epoch 18/100
0s - loss: 0.0209
Epoch 19/100
0s - loss: 0.0208
Epoch 20/100
0s - loss: 0.0204
Epoch 21/100
0s - loss: 0.0203
Epoch 22/100
0s - loss: 0.0200
Epoch 23/100
0s - loss: 0.0198
Epoch 24/100
0s - loss: 0.0197
Epoch 25/100
0s - loss: 0.0195
Epoch 26/100
0s - loss: 0.0194
Epoch 27/100
0s - loss: 0.0195
Epoch 28/100
0s - loss: 0.0194
Epoch 29/100
0s - loss: 0.0196
Epoch 30/100
0s - loss: 0.0193
Epoch 31/100
0s - loss: 0.0189
Epoch 32/100
0s - loss: 0.0187
Epoch 33/100
0s -

<keras.callbacks.History at 0x7f5c255bfad0>

Before evaluating our model, we need to remove the crowds layer used during training in order to expose the aggregation (bottleneck) layer

In [17]:
# save weights from crowds layer for later
weights = model.layers[5].get_weights()

# skip CrowdsLayer for predictions
model = Model(sequence_input, preds) 
model.compile(loss="mse", optimizer='adam')

# evaluate model
corr_test, mae_test, rmse_test, r2_test = eval_model(model, data_test, targets_test)

R2 Test:   0.400
