# Google Drive Setup

In [None]:
# check if we're running on GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU\n')
else:
  print(gpu_info+'\n')

# RAM info
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Sun Apr 10 05:35:29 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    33W / 250W |   2219MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# mount drive to access files in the drive
# establish path to project folder in "folder" variable
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive')
folder = Path('/content/drive/My Drive/W266-NLP-Final-Project') # can access files like this: folder/'dataset.csv'

Mounted at /content/drive


In [None]:
# create path to drive for any imports from our own modules
import sys
sys.path.insert(0,'/content/drive/My Drive/W266-NLP-Final-Project')

# Imports

In [None]:
#!pip install keras
#!pip install tensorflow
#!pip install torch==1.4.0
#!pip install sentencepiece
!pip install --upgrade transformers==3.0.2 # the authors probably used version 3.0.2
# !pip install contractions
# !pip install unidecode
# !pip install contractions
!pip install nltk

Collecting transformers==3.0.2
  Downloading transformers-3.0.2-py3-none-any.whl (769 kB)
[K     |████████████████████████████████| 769 kB 5.3 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 68.7 MB/s 
Collecting sentencepiece!=0.1.92
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 63.2 MB/s 
Collecting tokenizers==0.8.1.rc1
  Downloading tokenizers-0.8.1rc1-cp37-cp37m-manylinux1_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 64.7 MB/s 
Installing collected packages: tokenizers, sentencepiece, sacremoses, transformers
Successfully installed sacremoses-0.0.49 sentencepiece-0.1.96 tokenizers-0.8.1rc1 transformers-3.0.2


In [None]:
# the basics
import pandas as pd
import numpy as np
# import io
# import os
# import logging
# import random

# data cleaning
# import re
# import contractions as ct
# import string
# import unidecode 

# math + machine learning
# from scipy.stats import spearmanr
# from math import floor, ceil
from tqdm import tqdm # for nice progress meters
import sklearn
# from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import nltk 
import torch
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow import keras 
import bertembeddings # our personal tokenizing function

# import keras model and layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Concatenate
# import tensorflow.keras.utils.Sequence
# from transformers import *
import transformers
from transformers import TFBertModel, BertTokenizer
from transformers import BertTokenizer, TFBertModel
# from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# imports for model and tradeoff evaluation
from timeit import default_timer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# viz
# import seaborn as sns
# import matplotlib.pyplot as plt

np.set_printoptions(suppress=True)
print(tf.__version__)
print(transformers.__version__)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
2.8.0
3.0.2


# Save Model Results and Runtime

In [None]:
pd.set_option('display.max_colwidth', 0) # to display all text in a row
# split into train and test data
data = pd.read_csv(folder/'clean_dataset.csv',usecols=['text','humor'])
inputs = np.load(folder/"inputs_full_10k.npz", allow_pickle=True)
val_inputs = np.load(folder/"val_full_3k.npz", allow_pickle=True)
test_inputs = np.load(folder/'test_inputs_full_1k.npz', allow_pickle=True)

X_train = list(inputs.values())
X_val = list(val_inputs.values())
X_test = list(test_inputs.values())
y_train, y_val, y_test = data['humor'][:10000].values, data['humor'][10000:13000].values, data['humor'][160000:161000].values

df = data[['text', 'humor']][160000:161000] # making a dataframe to look at wrong classifications

In [None]:
# function to calculate f1 and accuracy scores
def model_scores(test_data, test_labels, model, runtime, df):
  """
  inputs
  - test_data_filepath: string containing path and filename of tokenized input, ie: folder/'test_inputs_full_1k.npz'
  - test_labels: np array of boolean labels for your test set
  - model: keras model being evaluated
  - runtime: runtime of the model being evaluated
  - df: dataframe with all the evaluation metrics
  """
  y_pred = (model.predict(test_data) > 0.5).flatten()
  precision, recall, f1, support = precision_recall_fscore_support(test_labels, y_pred)
  dummy_acc = round((support / support.sum()).max(), 3)
  precision = precision[precision != 0][0]
  recall = recall[recall != 0][0]
  f1 = f1[f1 != 0][0]
  acc = round(accuracy_score(test_labels, y_pred), 3)
  print("Accuracy = {}".format(round(acc, 3)))
  print("F1 score = {}".format(round(f1, 3)))

  cols = ['Model', 'Baseline Accuracy', 'Accuracy', 'Precision', 'Recall', 'F1', 'Runtime']
  model_df = pd.DataFrame([[model.name, dummy_acc, acc, precision, recall, f1, runtime]], columns=cols)
  df = df.append(model_df, ignore_index=True)
  df.to_csv('model_results.csv', index=False)
  return df

In [None]:
# dataframe with all evaluation metrics results
results_df = pd.DataFrame()

# Baseline Model
- 1-layer NN that predicts binary humor label using pre-trained BERT embeddings of 10K short jokes/non-jokes
- inputs: input IDs, attention masks, and token type IDs from pre-trained BERT tokenizer
- outputs: probability of a short text being humorous (p > 0.5 = humorous)

In [None]:
start = default_timer()

### basic model architecture ###

INPUT_LEN = 100 # same as MAX_LENGTH in bertembeddings.py

x1 = Input(shape=(INPUT_LEN,), dtype=tf.int32, name='input_ids')
x2 = Input(shape=(INPUT_LEN,), dtype=tf.int32, name='attention_masks')
x3 = Input(shape=(INPUT_LEN,), dtype=tf.int32, name='token_type_ids')

bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings = bert_model(x1, attention_mask=x2, token_type_ids=x3)
pooled_embeddings = bert_embeddings[1]

yhat = Dense(1, activation='sigmoid', name='output')(pooled_embeddings)

### end of model ###

stop = default_timer()
base_rt = stop - start

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/536M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
MAX_SENTENCES = 5
MAX_SENTENCE_LENGTH = 20
MAX_LENGTH = 100
BATCH_SIZE = 10
EPOCHS = 5

## Training the model

In [None]:
start = default_timer()

### intialize and compile model ###
base_model = Model([x1, x2, x3], [yhat], name='baseline')
base_model.compile(tf.keras.optimizers.Adam(0.00005),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

### train model ###

# create model checkpoint
checkpoint_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

# fit model
base_hist = base_model.fit(X_train[-3:], y_train, validation_data=(X_val[-3:], y_val),
                           epochs=EPOCHS, batch_size=6, callbacks=[model_checkpoint_callback])

# The model weights (that are considered the best) are loaded into the model.
base_model.load_weights(checkpoint_filepath)

stop = default_timer()
base_rt += (stop - start)

In [None]:
# in case you need to load it again
checkpoint_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline'
base_model = Model([x1, x2, x3], [yhat], name='baseline')
base_model.load_weights(checkpoint_filepath)
y_pred = (base_model.predict(X_test[-3:]) > 0.5).flatten()

array([[469,  15],
       [ 18, 498]])

In [None]:
results_df = model_scores(X_test[-3:], y_test, base_model, base_rt, results_df)

Accuracy = 0.966
F1 score = 0.965


# ColBERT with 10k data

In [None]:
SENT_INPUT_LEN = 20
DOC_INPUT_LEN = 100
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_sent3_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent3')
input_sent3_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent3')
input_sent3_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent3')

input_sent4_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent4')
input_sent4_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent4')
input_sent4_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent4')

input_sent5_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent5')
input_sent5_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent5')
input_sent5_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent5')

input_doc_1 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_ii_doc')
input_doc_2 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_am_doc')
input_doc_3 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_tti_doc')

# embedding layer for sentences and documents
#bert_embeddings = Embedding(num_tokens,embedding_dim,embeddings_initializer=keras.initializers.Constant(embedding_matrix),trainable=False)
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
#bert_embeddings = bert_model(input_ids=input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3) ########### HELP ###########
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings3 = bert_model(input_sent3_1, attention_mask=input_sent3_2, token_type_ids=input_sent3_3)
bert_embeddings4 = bert_model(input_sent4_1, attention_mask=input_sent4_2, token_type_ids=input_sent4_3)
bert_embeddings5 = bert_model(input_sent5_1, attention_mask=input_sent5_2, token_type_ids=input_sent5_3)
bert_embeddings6 = bert_model(input_doc_1, attention_mask=input_doc_2, token_type_ids=input_doc_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1] 
x3 = bert_embeddings3[1]
x4 = bert_embeddings4[1]
x5 = bert_embeddings5[1]
x6 = bert_embeddings6[1] 

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)
h1_3 = Dense(32, activation='relu', name="hidden1_sent3")(x3)
h1_4 = Dense(32, activation='relu', name="hidden1_sent4")(x4)
h1_5 = Dense(32, activation='relu', name="hidden1_sent5")(x5)
h1_6 = Dense(256, activation='relu', name="hidden1_doc")(x6)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1) ####################################################
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2) ####################################################
h1_dropout3 = Dropout(DROPOUT_RATE, name="h1_dropout_sent3")(h1_3) #                rate TO BE CHANGED                # 
h1_dropout4 = Dropout(DROPOUT_RATE, name="h1_dropout_sent4")(h1_4) #                                                  #
h1_dropout5 = Dropout(DROPOUT_RATE, name="h1_dropout_sent5")(h1_5) ####################################################
h1_dropout6 = Dropout(DROPOUT_RATE, name="h1_dropout_doc")(h1_6)   ####################################################

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)
h2_3 = Dense(8, activation='relu', name="hidden2_sent3")(h1_dropout3)
h2_4 = Dense(8, activation='relu', name="hidden2_sent4")(h1_dropout4)
h2_5 = Dense(8, activation='relu', name="hidden2_sent5")(h1_dropout5)
h2_6 = Dense(64, activation='relu', name="hidden2_doc")(h1_dropout6)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2, h2_3, h2_4, h2_5, h2_6])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3) ################ rate TO BE CHANGED ################

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4) # need to figure out dropout rate

In [None]:
model_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_sent3_1, input_sent3_2, input_sent3_3,
                input_sent4_1, input_sent4_2, input_sent4_3,
                input_sent5_1, input_sent5_2, input_sent5_3,
                input_doc_1, input_doc_2, input_doc_3]

input_dict = {'input_ii_sent1': inputs['0'], 'input_am_sent1': inputs['1'], 'input_tti_sent1': inputs['2'],
              'input_ii_sent2': inputs['3'], 'input_am_sent2': inputs['4'], 'input_tti_sent2': inputs['5'],
              'input_ii_sent3': inputs['6'], 'input_am_sent3': inputs['7'], 'input_tti_sent3': inputs['8'],
              'input_ii_sent4': inputs['9'], 'input_am_sent4': inputs['10'], 'input_tti_sent4': inputs['11'],
              'input_ii_sent5': inputs['12'], 'input_am_sent5': inputs['13'], 'input_tti_sent5': inputs['14'],
              'input_ii_doc': inputs['15'], 'input_am_doc': inputs['16'], 'input_tti_doc': inputs['17'],
} # we could just have the list of inputs, but this just helps us keep track

model = Model(inputs=model_inputs, outputs=[yhat], name="colbert_full_model")

In [None]:
EPOCHS = 5
checkpoint_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_best'

val_inputs = np.load('/content/drive/MyDrive/W266-NLP-Final-Project/val_full_3k.npz', allow_pickle=True)
x_val = list(val_inputs.values())

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


model = Model(inputs=model_inputs, outputs=[yhat], name="keras_func_model")
model.compile(tf.keras.optimizers.Adam(0.00001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

# Entire model is saved at the end of every epoch regardless of performance
model.fit(input_dict, y_train, validation_data=(x_val, y_val), 
          epochs=EPOCHS, batch_size=6, callbacks=[model_checkpoint_callback])

# The model weights (that are considered the best) are loaded into the model.
model.load_weights(checkpoint_filepath)

# ColBERT with 5k data

In [None]:
start = default_timer()

### model architecture ###

SENT_INPUT_LEN = 20
DOC_INPUT_LEN = 100
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_sent3_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent3')
input_sent3_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent3')
input_sent3_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent3')

input_sent4_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent4')
input_sent4_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent4')
input_sent4_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent4')

input_sent5_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent5')
input_sent5_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent5')
input_sent5_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent5')

input_doc_1 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_ii_doc')
input_doc_2 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_am_doc')
input_doc_3 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_tti_doc')

# embedding layer for sentences and documents
#bert_embeddings = Embedding(num_tokens,embedding_dim,embeddings_initializer=keras.initializers.Constant(embedding_matrix),trainable=False)
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
#bert_embeddings = bert_model(input_ids=input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3) ########### HELP ###########
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings3 = bert_model(input_sent3_1, attention_mask=input_sent3_2, token_type_ids=input_sent3_3)
bert_embeddings4 = bert_model(input_sent4_1, attention_mask=input_sent4_2, token_type_ids=input_sent4_3)
bert_embeddings5 = bert_model(input_sent5_1, attention_mask=input_sent5_2, token_type_ids=input_sent5_3)
bert_embeddings6 = bert_model(input_doc_1, attention_mask=input_doc_2, token_type_ids=input_doc_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1] 
x3 = bert_embeddings3[1]
x4 = bert_embeddings4[1]
x5 = bert_embeddings5[1]
x6 = bert_embeddings6[1] 

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)
h1_3 = Dense(32, activation='relu', name="hidden1_sent3")(x3)
h1_4 = Dense(32, activation='relu', name="hidden1_sent4")(x4)
h1_5 = Dense(32, activation='relu', name="hidden1_sent5")(x5)
h1_6 = Dense(256, activation='relu', name="hidden1_doc")(x6)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1) 
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2) 
h1_dropout3 = Dropout(DROPOUT_RATE, name="h1_dropout_sent3")(h1_3) 
h1_dropout4 = Dropout(DROPOUT_RATE, name="h1_dropout_sent4")(h1_4) 
h1_dropout5 = Dropout(DROPOUT_RATE, name="h1_dropout_sent5")(h1_5) 
h1_dropout6 = Dropout(DROPOUT_RATE, name="h1_dropout_doc")(h1_6)   

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)
h2_3 = Dense(8, activation='relu', name="hidden2_sent3")(h1_dropout3)
h2_4 = Dense(8, activation='relu', name="hidden2_sent4")(h1_dropout4)
h2_5 = Dense(8, activation='relu', name="hidden2_sent5")(h1_dropout5)
h2_6 = Dense(64, activation='relu', name="hidden2_doc")(h1_dropout6)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2, h2_3, h2_4, h2_5, h2_6])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3)

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4)

model_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_sent3_1, input_sent3_2, input_sent3_3,
                input_sent4_1, input_sent4_2, input_sent4_3,
                input_sent5_1, input_sent5_2, input_sent5_3,
                input_doc_1, input_doc_2, input_doc_3]

### end of model architecture ###

stop = default_timer()
colbert_5k_rt = (stop - start)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
start = default_timer()

### intialize and compile model ###
colbert_5k_model = Model(model_inputs, [yhat], name='colbert_5k')
colbert_5k_model.compile(tf.keras.optimizers.Adam(0.00001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

### train model ###
X_train_5k = [X_train[i][:5000,] for i in range(len(X_train))]
X_val_3k = [X_val[i][:3000,] for i in range(len(X_val))]

y_train_5k, y_val_3k = y_train[:5000], y_val[:3000]

# create model checkpoint
colbert_5k_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k'

colbert_5k_cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=colbert_5k_cp_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

# fit model
colbert_5k_hist = colbert_5k_model.fit(X_train_5k, y_train_5k, validation_data=(X_val_3k, y_val_3k),
                           epochs=EPOCHS, batch_size=6, callbacks=[colbert_5k_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
colbert_5k_model.load_weights(colbert_5k_cp_filepath)

stop = default_timer()
colbert_5k_rt += (stop - start)

Epoch 1/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


Epoch 2/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


Epoch 3/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


Epoch 4/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_5k/assets


Epoch 5/5


In [None]:
results_df = pd.read_csv(folder/'model_results.csv')
X_test_5k = [X_test[i][:5000,] for i in range(len(X_test))]
y_test_5k = y_test[:5000]
results_df = model_scores(X_test_5k, y_test_5k, colbert_5k_model, colbert_5k_rt, results_df)
results_df.to_csv('model_results.csv', index=False)

Accuracy = 0.964
F1 score = 0.963


In [None]:
results_df

Unnamed: 0,Model,Baseline Accuracy,Accuracy,Precision,Recall,F1,Runtime
0,baseline,0.516,0.966,0.962963,0.966942,0.964948,1133.553155
1,colbert_doc,0.516,0.961,0.962578,0.956612,0.959585,1274.059635
2,baseline_sent,0.516,0.961,0.960663,0.958678,0.959669,1715.643404
3,colbert_sent,0.516,0.961,0.970402,0.948347,0.959248,1998.740468
4,colbert_2sent,0.516,0.958,0.947368,0.966942,0.957055,942.127935
5,colbert_simple,0.516,0.962,0.9869,0.933884,0.95966,1788.420719
6,colbert_full_model,0.516,0.97,0.969008,0.969008,0.969008,40.648261
7,colbert_5k,0.516,0.964,0.959016,0.966942,0.962963,1712.479673


# ColBERT with *only* documents as input

In [None]:
start = default_timer()

### model architecture ###
DOC_INPUT_LEN = 100
DROPOUT_RATE = 0.5

# 3 inputs, (1 document-level path)
input_doc_1 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_ii_doc') # input IDs
input_doc_2 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_am_doc') # attention masks
input_doc_3 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_tti_doc') # token type IDs

# embedding layer for sentences and documents
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings6 = bert_model(input_doc_1, attention_mask=input_doc_2, token_type_ids=input_doc_3)

# get pooled vectors of BERT sentence embeddings
x6 = bert_embeddings6[1] 

# fully connected layer w/ dropout
h1_6 = Dense(256, activation='relu', name="hidden1_doc")(x6)

h1_dropout6 = Dropout(DROPOUT_RATE, name="h1_dropout_doc")(h1_6)

# fully connected layer
h2_6 = Dense(64, activation='relu', name="hidden2_doc")(h1_dropout6)

# no need to concatenate outputs from previous layers

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(h2_6)
h3_dropout = Dropout(DROPOUT_RATE)(h3)

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4) # need to figure out dropout rate

### end of model architecture ###

stop = default_timer()
colbert_doc_rt = (stop - start)

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/536M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
start = default_timer()

### intialize and compile model ###
EPOCHS = 5

colbert_doc_model = Model([input_doc_1, input_doc_2, input_doc_3], [yhat], name='colbert_doc')
colbert_doc_model.compile(tf.keras.optimizers.Adam(0.00001),
                          loss='binary_crossentropy',
                          metrics = ['accuracy'])

### train model ###

# create model checkpoint
colbert_doc_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc'

colbert_doc_model_cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                  filepath=colbert_doc_cp_filepath,
                                  save_weights_only=False,
                                  monitor='val_accuracy',
                                  mode='max',
                                  save_best_only=True)

# fit model
colbert_doc_hist = colbert_doc_model.fit(X_train[-3:], y_train, validation_data=(X_val[-3:], y_val),
                                         epochs=EPOCHS, batch_size=6,
                                         callbacks=[colbert_doc_model_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
colbert_doc_model.load_weights(colbert_doc_cp_filepath)

stop = default_timer()
colbert_doc_rt += (stop - start)

Epoch 1/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


Epoch 2/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


Epoch 3/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


Epoch 4/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


Epoch 5/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_doc/assets




In [None]:
results_df = model_scores(X_test[-3:], y_test, colbert_doc_model, colbert_doc_rt, results_df)

Accuracy = 0.961
F1 score = 0.96


# Baseline Model with Sentence Inputs

In [None]:
start = default_timer()

### model architecture ###
SENT_INPUT_LEN = 20

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_sent3_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent3')
input_sent3_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent3')
input_sent3_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent3')

input_sent4_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent4')
input_sent4_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent4')
input_sent4_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent4')

input_sent5_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent5')
input_sent5_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent5')
input_sent5_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent5')

# embedding layer for sentences and documents
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings3 = bert_model(input_sent3_1, attention_mask=input_sent3_2, token_type_ids=input_sent3_3)
bert_embeddings4 = bert_model(input_sent4_1, attention_mask=input_sent4_2, token_type_ids=input_sent4_3)
bert_embeddings5 = bert_model(input_sent5_1, attention_mask=input_sent5_2, token_type_ids=input_sent5_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1] 
x3 = bert_embeddings3[1]
x4 = bert_embeddings4[1]
x5 = bert_embeddings5[1]

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([x1, x2, x3, x4, x5])

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(xx)


### end of model architecture ###

stop = default_timer()
base_sent_rt = (stop - start)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
start = default_timer()

### intialize and compile model ###
EPOCHS = 5

sent_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
               input_sent2_1, input_sent2_2, input_sent2_3,
               input_sent3_1, input_sent3_2, input_sent3_3,
               input_sent4_1, input_sent4_2, input_sent4_3,
               input_sent5_1, input_sent5_2, input_sent5_3]

base_sent_model = Model(sent_inputs, [yhat], name='baseline_sent')
base_sent_model.compile(tf.keras.optimizers.Adam(0.00005),
                          loss='binary_crossentropy',
                          metrics = ['accuracy'])

### train model ###

# create model checkpoint
base_sent_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent'

base_sent_model_cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                  filepath=base_sent_cp_filepath,
                                  save_weights_only=False,
                                  monitor='val_accuracy',
                                  mode='max',
                                  save_best_only=True)

# fit model
base_sent_hist = base_sent_model.fit(X_train[:15], y_train, validation_data=(X_val[:15], y_val),
                                         epochs=EPOCHS, batch_size=6,
                                         callbacks=[base_sent_model_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
base_sent_model.load_weights(base_sent_cp_filepath)

stop = default_timer()
base_sent_rt += (stop - start)

Epoch 1/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets


Epoch 2/5
Epoch 3/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets


Epoch 4/5
Epoch 5/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/baseline_sent/assets




In [None]:
# save model eval reulsts into dataframe
results_df = model_scores(X_test[:15], y_test, base_sent_model, base_sent_rt, results_df)





















Accuracy = 0.954
F1 score = 0.953


In [None]:
results_df

Unnamed: 0,Model,Baseline Accuracy,Accuracy,Precision,Recall,F1,Runtime
0,baseline,0.516,0.966,0.962963,0.966942,0.964948,1133.553155
1,colbert_doc,0.516,0.961,0.962578,0.956612,0.959585,1274.059635
2,baseline_sent,0.516,0.961,0.960663,0.958678,0.959669,1715.643404
3,baseline_sent,0.516,0.954,0.950617,0.954545,0.952577,1886.257185


# ColBERT with *only* sentences as input

In [None]:
start = default_timer()

### model architecture ###
SENT_INPUT_LEN = 20
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_sent3_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent3')
input_sent3_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent3')
input_sent3_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent3')

input_sent4_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent4')
input_sent4_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent4')
input_sent4_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent4')

input_sent5_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent5')
input_sent5_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent5')
input_sent5_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent5')

# embedding layer for sentences and documents
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings3 = bert_model(input_sent3_1, attention_mask=input_sent3_2, token_type_ids=input_sent3_3)
bert_embeddings4 = bert_model(input_sent4_1, attention_mask=input_sent4_2, token_type_ids=input_sent4_3)
bert_embeddings5 = bert_model(input_sent5_1, attention_mask=input_sent5_2, token_type_ids=input_sent5_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1] 
x3 = bert_embeddings3[1]
x4 = bert_embeddings4[1]
x5 = bert_embeddings5[1]

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)
h1_3 = Dense(32, activation='relu', name="hidden1_sent3")(x3)
h1_4 = Dense(32, activation='relu', name="hidden1_sent4")(x4)
h1_5 = Dense(32, activation='relu', name="hidden1_sent5")(x5)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1)
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2)
h1_dropout3 = Dropout(DROPOUT_RATE, name="h1_dropout_sent3")(h1_3)
h1_dropout4 = Dropout(DROPOUT_RATE, name="h1_dropout_sent4")(h1_4)
h1_dropout5 = Dropout(DROPOUT_RATE, name="h1_dropout_sent5")(h1_5)

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)
h2_3 = Dense(8, activation='relu', name="hidden2_sent3")(h1_dropout3)
h2_4 = Dense(8, activation='relu', name="hidden2_sent4")(h1_dropout4)
h2_5 = Dense(8, activation='relu', name="hidden2_sent5")(h1_dropout5)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2, h2_3, h2_4, h2_5])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3)

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4)

### end of model architecture ###

stop = default_timer()
colbert_sent_rt = (stop - start)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
start = default_timer()

### intialize and compile model ###
EPOCHS = 5

sent_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
               input_sent2_1, input_sent2_2, input_sent2_3,
               input_sent3_1, input_sent3_2, input_sent3_3,
               input_sent4_1, input_sent4_2, input_sent4_3,
               input_sent5_1, input_sent5_2, input_sent5_3]

colbert_sent_model = Model(sent_inputs, [yhat], name='colbert_sent')
colbert_sent_model.compile(tf.keras.optimizers.Adam(0.00001),
                            loss='binary_crossentropy',
                            metrics = ['accuracy'])

### train model ###

# create model checkpoint
colbert_sent_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent'

colbert_sent_cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                  filepath=colbert_sent_cp_filepath,
                                  save_weights_only=False,
                                  monitor='val_accuracy',
                                  mode='max',
                                  save_best_only=True)

# fit model
colbert_sent_hist = colbert_sent_model.fit(X_train[:15], y_train, validation_data=(X_val[:15], y_val),
                                         epochs=EPOCHS, batch_size=6,
                                         callbacks=[colbert_sent_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
colbert_sent_model.load_weights(colbert_sent_cp_filepath)

stop = default_timer()
colbert_sent_rt += (stop - start)

Epoch 1/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


Epoch 2/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


Epoch 3/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


Epoch 4/5
Epoch 5/5



INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_sent/assets




In [None]:
results_df = model_scores(X_test[:15], y_test, colbert_sent_model, colbert_sent_rt, results_df)

Accuracy = 0.961
F1 score = 0.959


# ColBERT with *only* 1st and 2nd sentences as input

In [None]:
start = default_timer()

### model architecture ###
SENT_INPUT_LEN = 20
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

# embedding layer for sentences and documents
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1]

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1)
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2)

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3)

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4)

### end of model architecture ###

stop = default_timer()
colbert_2sent_rt = (stop - start)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
start = default_timer()

### intialize and compile model ###
EPOCHS = 5

sent2_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3]

colbert_2sent_model = Model(sent2_inputs, [yhat], name='colbert_2sent')
colbert_2sent_model.compile(tf.keras.optimizers.Adam(0.00001),
                            loss='binary_crossentropy',
                            metrics = ['accuracy'])

### train model ###

# create model checkpoint
colbert_2sent_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_2sent'

colbert_2sent_cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                  filepath=colbert_2sent_cp_filepath,
                                  save_weights_only=False,
                                  monitor='val_accuracy',
                                  mode='max',
                                  save_best_only=True)

# fit model
colbert_2sent_hist = colbert_2sent_model.fit(X_train[:6], y_train, validation_data=(X_val[:6], y_val),
                                         epochs=EPOCHS, batch_size=6,
                                         callbacks=[colbert_2sent_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
colbert_2sent_model.load_weights(colbert_2sent_cp_filepath)

stop = default_timer()
colbert_2sent_rt += (stop - start)

In [None]:
results_df = model_scores(X_test[:6], y_test, colbert_2sent_model, colbert_2sent_rt, results_df)

Accuracy = 0.958
F1 score = 0.957


In [None]:
# in case you need to load it again
colbert_2sent_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_2sent'
sent2_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3]

colbert_2sent_model = Model(sent2_inputs, [yhat], name='colbert_2sent')
colbert_2sent_model.load_weights(colbert_2sent_cp_filepath)
y_pred_2sent = (colbert_2sent_model.predict(X_test[:6]) > 0.5).flatten()


array([[468,  16],
       [ 26, 490]])

# ColBERT simple: ColBERT with 1st and 2nd Sentences + Document

In [None]:
start = default_timer()

### model architecture ###
SENT_INPUT_LEN = 20
DOC_INPUT_LEN = 100
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_doc_1 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_ii_doc')
input_doc_2 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_am_doc')
input_doc_3 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_tti_doc')

# embedding layer for sentences and documents
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings6 = bert_model(input_doc_1, attention_mask=input_doc_2, token_type_ids=input_doc_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1]
x6 = bert_embeddings6[1] 

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)
h1_6 = Dense(256, activation='relu', name="hidden1_doc")(x6)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1)
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2)
h1_dropout6 = Dropout(DROPOUT_RATE, name="h1_dropout_doc")(h1_6)

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)
h2_6 = Dense(64, activation='relu', name="hidden2_doc")(h1_dropout6)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2, h2_6])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3)

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4)

### end of model architecture ###

stop = default_timer()
colbert_simple_rt = (stop - start)

Downloading:   0%|          | 0.00/433 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/536M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
simple_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_doc_1, input_doc_2, input_doc_3]

colbert_simple_model = Model(simple_inputs, [yhat], name='colbert_simple')
colbert_simple_model.compile(tf.keras.optimizers.Adam(0.00001),
                              loss='binary_crossentropy',
                              metrics = ['accuracy'])
colbert_simple_model.load_weights(colbert_simple_cp_filepath)
y_pred = (model.predict(test_data) > 0.5).flatten()

In [None]:
start = default_timer()

### intialize and compile model ###
EPOCHS = 5

simple_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_doc_1, input_doc_2, input_doc_3]

colbert_simple_model = Model(simple_inputs, [yhat], name='colbert_simple')
colbert_simple_model.compile(tf.keras.optimizers.Adam(0.00001),
                              loss='binary_crossentropy',
                              metrics = ['accuracy'])

### train model ###

# create model checkpoint
colbert_simple_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_simple'

colbert_simple_cp_callback = tf.keras.callbacks.ModelCheckpoint(
                                  filepath=colbert_simple_cp_filepath,
                                  save_weights_only=False,
                                  monitor='val_accuracy',
                                  mode='max',
                                  save_best_only=True)

# fit model
colbert_simple_hist = colbert_simple_model.fit((X_train[:6] + X_train[-3:]), y_train, 
                                               validation_data=((X_val[:6]+X_val[-3:]), y_val),
                                               epochs=EPOCHS, batch_size=6,
                                               callbacks=[colbert_simple_cp_callback])

# The model weights (that are considered the best) are loaded into the model.
colbert_simple_model.load_weights(colbert_simple_cp_filepath)

stop = default_timer()
colbert_simple_rt += (stop - start)

In [None]:
# in case you need to load it again
colbert_simple_cp_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_simple'
simple_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_doc_1, input_doc_2, input_doc_3]

colbert_simple_model = Model(simple_inputs, [yhat], name='colbert_simple')
colbert_simple_model.load_weights(colbert_simple_cp_filepath)
y_pred = (colbert_simple_model.predict(X_test[:6] + X_test[-3:]) > 0.5).flatten()

# make a df to compare true and predicted values
# pd.set_option('display.max_colwidth', 0) # to display all text in a row
df = data[['text', 'humor']][160000:161000]
df['y_pred'] = y_pred
df = df[df['y_pred'] != df['humor']]
df.to_csv('/content/drive/My Drive/W266-NLP-Final-Project/colbert_simple_misclassed.csv', index=True)
sklearn.metrics.confusion_matrix(y_test,y_pred)

array([[452,  32],
       [  6, 510]])

In [None]:
results_df = model_scores((X_test[:6] + X_test[-3:]), y_test, colbert_simple_model, colbert_simple_rt, results_df)

Accuracy = 0.962
F1 score = 0.96


In [None]:
start = default_timer()

SENT_INPUT_LEN = 20
DOC_INPUT_LEN = 100
DROPOUT_RATE = 0.5

# 18 inputs, 3 for each parallel path (5 sentence-level paths & 1 document-level path)
input_sent1_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent1') # input IDs
input_sent1_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent1') # attention masks
input_sent1_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent1') # token type IDs

input_sent2_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent2')
input_sent2_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent2')
input_sent2_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent2')

input_sent3_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent3')
input_sent3_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent3')
input_sent3_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent3')

input_sent4_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent4')
input_sent4_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent4')
input_sent4_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent4')

input_sent5_1 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_ii_sent5')
input_sent5_2 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_am_sent5')
input_sent5_3 = Input(shape=(SENT_INPUT_LEN,), dtype=tf.int32, name='input_tti_sent5')

input_doc_1 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_ii_doc')
input_doc_2 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_am_doc')
input_doc_3 = Input(shape=(DOC_INPUT_LEN,), dtype=tf.int32, name='input_tti_doc')

# embedding layer for sentences and documents
#bert_embeddings = Embedding(num_tokens,embedding_dim,embeddings_initializer=keras.initializers.Constant(embedding_matrix),trainable=False)
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
#bert_embeddings = bert_model(input_ids=input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3) ########### HELP ###########
bert_embeddings1 = bert_model(input_sent1_1, attention_mask=input_sent1_2, token_type_ids=input_sent1_3)
bert_embeddings2 = bert_model(input_sent2_1, attention_mask=input_sent2_2, token_type_ids=input_sent2_3)
bert_embeddings3 = bert_model(input_sent3_1, attention_mask=input_sent3_2, token_type_ids=input_sent3_3)
bert_embeddings4 = bert_model(input_sent4_1, attention_mask=input_sent4_2, token_type_ids=input_sent4_3)
bert_embeddings5 = bert_model(input_sent5_1, attention_mask=input_sent5_2, token_type_ids=input_sent5_3)
bert_embeddings6 = bert_model(input_doc_1, attention_mask=input_doc_2, token_type_ids=input_doc_3)

# get pooled vectors of BERT sentence embeddings
x1 = bert_embeddings1[1] # can also do GlobalAveragePooling1D()
x2 = bert_embeddings2[1] 
x3 = bert_embeddings3[1]
x4 = bert_embeddings4[1]
x5 = bert_embeddings5[1]
x6 = bert_embeddings6[1] 

# fully connected layer w/ dropout
h1_1 = Dense(32, activation='relu', name="hidden1_sent1")(x1)
h1_2 = Dense(32, activation='relu', name="hidden1_sent2")(x2)
h1_3 = Dense(32, activation='relu', name="hidden1_sent3")(x3)
h1_4 = Dense(32, activation='relu', name="hidden1_sent4")(x4)
h1_5 = Dense(32, activation='relu', name="hidden1_sent5")(x5)
h1_6 = Dense(256, activation='relu', name="hidden1_doc")(x6)

h1_dropout1 = Dropout(DROPOUT_RATE, name="h1_dropout_sent1")(h1_1) ####################################################
h1_dropout2 = Dropout(DROPOUT_RATE, name="h1_dropout_sent2")(h1_2) ####################################################
h1_dropout3 = Dropout(DROPOUT_RATE, name="h1_dropout_sent3")(h1_3) #                rate TO BE CHANGED                # 
h1_dropout4 = Dropout(DROPOUT_RATE, name="h1_dropout_sent4")(h1_4) #                                                  #
h1_dropout5 = Dropout(DROPOUT_RATE, name="h1_dropout_sent5")(h1_5) ####################################################
h1_dropout6 = Dropout(DROPOUT_RATE, name="h1_dropout_doc")(h1_6)   ####################################################

# fully connected layer
h2_1 = Dense(8, activation='relu', name="hidden2_sent1")(h1_dropout1)
h2_2 = Dense(8, activation='relu', name="hidden2_sent2")(h1_dropout2)
h2_3 = Dense(8, activation='relu', name="hidden2_sent3")(h1_dropout3)
h2_4 = Dense(8, activation='relu', name="hidden2_sent4")(h1_dropout4)
h2_5 = Dense(8, activation='relu', name="hidden2_sent5")(h1_dropout5)
h2_6 = Dense(64, activation='relu', name="hidden2_doc")(h1_dropout6)

# concatenate outputs of all 6 parallel layers
xx = Concatenate()([h2_1, h2_2, h2_3, h2_4, h2_5, h2_6])

# fully connected layer w/ dropout for concatenated inputs
h3 = Dense(512, activation='relu', name="hidden3")(xx)
h3_dropout = Dropout(DROPOUT_RATE)(h3) ################ rate TO BE CHANGED ################

# fully connected layer
h4 = Dense(256, activation='relu', name="hidden4")(h3_dropout)

# final output layer
yhat = Dense(1, activation='sigmoid', name="output")(h4) # need to figure out dropout rate

stop = default_timer()
colbert_full_rt = (stop - start)

- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
EPOCHS = 5 

full_inputs = [input_sent1_1, input_sent1_2, input_sent1_3,
                input_sent2_1, input_sent2_2, input_sent2_3,
                input_sent3_1, input_sent3_2, input_sent3_3,
                input_sent4_1, input_sent4_2, input_sent4_3,
                input_sent5_1, input_sent5_2, input_sent5_3,
                input_doc_1, input_doc_2, input_doc_3]

colbert_full_model = Model(inputs=full_inputs, outputs=[yhat], name="colbert_full_model")
colbert_full_model.compile(tf.keras.optimizers.Adam(0.00001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

# The model weights (that are considered the best) are loaded into the model.
colbert_full_filepath = '/content/drive/MyDrive/W266-NLP-Final-Project/checkpoint/colbert_best'

colbert_full_model.load_weights(colbert_full_filepath)

stop = default_timer()
colbert_full_rt  += (stop - start)

In [None]:
results_df = model_scores(X_test, y_test, colbert_full_model, colbert_full_rt, results_df)

Accuracy = 0.97
F1 score = 0.969


In [None]:
results_df.to_csv('model_results.csv', index=False)

In [None]:
results_df

Unnamed: 0,Model,Baseline Accuracy,Accuracy,Precision,Recall,F1,Runtime
0,baseline,0.516,0.966,0.962963,0.966942,0.964948,1133.553155
1,colbert_doc,0.516,0.961,0.962578,0.956612,0.959585,1274.059635
2,baseline_sent,0.516,0.961,0.960663,0.958678,0.959669,1715.643404
3,baseline_sent,0.516,0.954,0.950617,0.954545,0.952577,1886.257185
4,colbert_sent,0.516,0.961,0.970402,0.948347,0.959248,1998.740468
5,colbert_2sent,0.516,0.958,0.947368,0.966942,0.957055,942.127935
6,colbert_simple,0.516,0.962,0.9869,0.933884,0.95966,1788.420719
7,colbert_full_model,0.516,0.97,0.969008,0.969008,0.969008,40.648261
