In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
!pip install keras-bert

Collecting keras-bert
  Downloading https://files.pythonhosted.org/packages/2c/0f/cdc886c1018943ea62d3209bc964413d5aa9d0eb7e493abd8545be679294/keras-bert-0.81.0.tar.gz
Collecting keras-transformer>=0.30.0
  Downloading https://files.pythonhosted.org/packages/22/b9/9040ec948ef895e71df6bee505a1f7e1c99ffedb409cb6eb329f04ece6e0/keras-transformer-0.33.0.tar.gz
Collecting keras-pos-embd>=0.10.0
  Downloading https://files.pythonhosted.org/packages/09/70/b63ed8fc660da2bb6ae29b9895401c628da5740c048c190b5d7107cadd02/keras-pos-embd-0.11.0.tar.gz
Collecting keras-multi-head>=0.22.0
  Downloading https://files.pythonhosted.org/packages/40/3e/d0a64bb2ac5217928effe4507c26bbd19b86145d16a1948bc2d4f4c6338a/keras-multi-head-0.22.0.tar.gz
Collecting keras-layer-normalization>=0.12.0
  Downloading https://files.pythonhosted.org/packages/a4/0e/d1078df0494bac9ce1a67954e5380b6e7569668f0f3b50a9531c62c1fc4a/keras-layer-normalization-0.14.0.tar.gz
Collecting keras-position-wise-feed-forward>=0.5.0
  Downloading

In [0]:
import tensorflow as tf
import sys
from collections import Counter
import numpy as np
import json
import os
from keras.preprocessing.text import Tokenizer, tokenizer_from_json
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model, Input, Model
from keras.layers import Embedding, LSTM, Dense, Dropout, GRU, Bidirectional, Flatten, Reshape, Concatenate
from keras.callbacks import ModelCheckpoint
from keras.losses import SparseCategoricalCrossentropy, sparse_categorical_crossentropy, categorical_crossentropy, Loss, MSE
from keras.optimizers import Adam
from keras_bert import get_model, compile_model
from keras_bert.layers import Extract
from keras_bert import get_base_dict
from keras_bert import Tokenizer as bert_tokenizer
from collections import Counter
from tabulate import tabulate
from multiprocessing import Pool, Queue, Manager
from itertools import combinations

MY_DRIVE = "/content/gdrive/My Drive"
root_folder = os.path.join(MY_DRIVE, "YelpHelp") # change this depending on the machine (Colab vs IPython)
dataset_folder = os.path.join(root_folder, "datasets")
dataset_name = "yelp_review_training_dataset.jsonl"
models_dir = os.path.join(root_folder, "models")
checkpoint_dir = os.path.join(models_dir, "checkpoints")
tokenizers_dir = os.path.join(models_dir, "tokenizers")
test_set_dir = os.path.join(root_folder, "test-sets")
ensemble_dir = os.path.join(root_folder, "ensembles")

sys.path.append(os.path.join(root_folder, "source"))
from utils import *
from models import *

Using TensorFlow backend.


Check for GPU

In [0]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


Utility functions

In [0]:
def get_texts_and_labels(dataset):
    texts = [d["text"] for d in dataset]
    labels = [d["stars"] - 1 for d in dataset]
    return texts, labels

def load_tokenizer(name):
    file_path = os.path.join(tokenizers_dir, name)
    with open(file_path) as tkf:
        return tokenizer_from_json(tkf.read())

def train_model(model, train_seqs, train_labels, num_epochs, save_as, batch_size=64, validation_split=.2, save_weights=False):
    save_file = os.path.join(models_dir, save_as)
    checkpoint_file = os.path.join(checkpoint_dir, f"{save_as}.ckpt")
    cp_callback = ModelCheckpoint(filepath=checkpoint_file, verbose=1, save_weights_only=save_weights)
    training_result = model.fit(train_seqs, train_labels, epochs=num_epochs, batch_size=batch_size, validation_split=.2, callbacks=[cp_callback])
    model.save(save_file)

def predict_on_texts(model, texts, preprocessor, actual_stars=None):
    inputs = preprocessor.preprocess(texts)
    predictions = model.predict(inputs)
    for i, p in enumerate(predictions):
        print("---------------------")
        print("TEXT:\n{}\nPREDICTED STARS:{}".format(texts[i], np.argmax(p) + 1))
        if actual_stars:
            print("ACTUAL STARS: {}".format(actual_stars[i]))

def get_balanced_dataset(dataset, size=1000):
    class_counter = Counter()
    result = []
    ration = size // 5
    finished = set()
    for d in dataset:
        star = d["stars"]
        if star not in finished:
            class_counter[star] += 1
            result.append(d)
            if class_counter[star] >= ration:
                finished.add(star)
        if len(finished) == 5:
            return result
    return result
        

def predict_from_data(model, dataset, preprocessor):
    stars = None
    if "stars" in dataset[0]:
        stars = [d["stars"] for d in dataset]
    texts = [d["text"] for d in dataset]
    predict_on_texts(model, texts, preprocessor, actual_stars=stars)

def batch_predict(batch, model, preprocessor):
    texts = [b["text"] for b in batch]
    batch_input = preprocessor.preprocess(texts)
    predictions = model.predict_ratings(batch_input)
    assert len(batch) == len(predictions)
    for i, b in enumerate(batch):
        b["predicted_stars"] = predictions[i]
        
def predict_test_set(test_set, model, preprocessor, batch_size=64, show_accuracy=True, print_results=True):
    for i in range(0, len(test_set), batch_size):
        batch = test_set[i: i + batch_size]
        batch_predict(batch, model, preprocessor)
    accuracy, avg_star_error = None, None
    if show_accuracy:
        accuracy = (len([d for d in test_set if d["stars"] == d["predicted_stars"]]) / len(test_set)) * 100
        avg_star_error = sum([abs(d["predicted_stars"] - d["stars"]) for d in test_set]) / len(test_set)
        if print_results:
            print("Accuracy: {:.3f}".format(accuracy))
            print("Average Star Error: {:.5f}".format(avg_star_error))
    return accuracy, avg_star_error

def load_data_set(name, test_set=False):
    set_dir = test_set_dir if test_set else dataset_folder
    with open(os.path.join(set_dir, name)) as df:
        return [json.loads(line) for line in df]

def load_keras_model(name, custom_objects={}, compile=True):
    return load_model(os.path.join(models_dir, name), custom_objects=custom_objects, compile=compile)

def load_custom_model(name, loss_func, custom_objects={}, metrics=[]):
    model = load_keras_model(name, custom_objects=custom_objects, compile=False)
    model.compile(optimizer=Adam(), loss=loss_func, metrics=metrics)
    return model

def load_transformer(name):
    weights_file = os.path.join(models_dir, name)
    model = build_transformer_model()
    model.load_weights(weights_file)
    return model

def compare_class_accuracies(test_set, models_and_preprocs):
    results = {}
    def class_result(c):
        relevant = [d for d in test_set if int(d["stars"]) == c]
        acc = len([d for d in relevant if d["stars"] == d["predicted_stars"]]) / len(relevant)
        star_err = sum([abs(d["stars"] - d["predicted_stars"]) for d in relevant]) / len(relevant)
        return acc, star_err
    for name, model_and_preproc in models_and_preprocs.items():
        model, preprocessor = model_and_preproc
        avg_acc, avg_se = predict_test_set(test_set, model, preprocessor, print_results=False)
        result = results[name] = [(avg_acc, avg_se)]
        result += [class_result(c) for c in range(1, 6)]
    headers = ["Model", "OVERALL\naccuracy | star error"]
    headers += ["{}\naccuracy | star error".format(star) for star in range(1, 6)]
    def format_result(result):
        acc, star_error = result
        return "{:.3f}     {:.3f}".format(acc, star_error)
    table = [[name] + [format_result(r) for r in result] for name, result in results.items()]
    print(tabulate(table, headers, tablefmt='fancy_grid'))



def compare_on_test_sets(test_sets, models_and_preprocs, show_results=True):
    results = {name: {} for name in models_and_preprocs}
    for model_name, model_and_preproc in models_and_preprocs.items():
        result = results[model_name]
        overall = result['overall'] = [0, 0]
        for test_name, test_set in test_sets.items():
            model, preprocessor = model_and_preproc
            acc, star_error = predict_test_set(test_set, model, preprocessor, print_results=False)
            result[test_name] = [acc, star_error]
            overall[0] += acc
            overall[1] += star_error
        overall[0] /= len(test_sets)
        overall[1] /= len(test_sets)
    table = []
    test_names = list(test_sets.keys())
    headers = ["Model", "OVERALL\nstar error | accuracy"]
    headers += ["{}\nstar error | accuracy".format(name) for name in test_names]
    def format_result(result, col_name):
        acc, star_error = result
        is_best_acc = acc == max([r[col_name][0] for r in results.values()])
        is_best_star_error = star_error == min([r[col_name][1] for r in results.values()])
        return "{:.3f}{}     {:.3f}{}".format(star_error, " *" if is_best_acc else "  ", acc, " *" if is_best_star_error else "  ")
    for model_name, result in results.items():
        row = [model_name, format_result(result['overall'], 'overall')]
        row += [format_result(result[name], name) for name in test_names]
        table.append(row)
    if show_results:
        print(tabulate(table, headers, tablefmt='fancy_grid'))
    return results


In [0]:
dataset = load_data_set(dataset_name)
challenge_3 = load_data_set("yelp_challenge_3_with_answers.jsonl", test_set=True)
challenge_5 = load_data_set("yelp_challenge_5_with_answers.jsonl", test_set=True)
challenge_6 = load_data_set("yelp_challenge_6_with_answers.jsonl", test_set=True)
challenge_8 = load_data_set("yelp_challenge_8_with_answers.jsonl", test_set=True)
random_test_set = np.random.choice(dataset, 10000)

In [0]:
class_counter = Counter()
for d in dataset:
    class_counter[d["stars"]] += 1
weights = []
for star, count in sorted(class_counter.items(), key=lambda t: t[0]):
    fraction = count / len(dataset)
    weight = .1 * (1 / fraction)
    weights.append(weight)
    print("star:", star, "fraction:", fraction, "weight:", weight, "count: ", count)
loss_weights = np.array(weights, dtype="float32")
print(loss_weights)

star: 1.0 fraction: 0.2434082173090871 weight: 0.4108324735521028 count:  129878
star: 2.0 fraction: 0.06720254281917834 weight: 1.4880389313402869 count:  35858
star: 3.0 fraction: 0.0642133059460513 weight: 1.55730963429939 count:  34263
star: 4.0 fraction: 0.13572822120727687 weight: 0.7367664521830384 count:  72422
star: 5.0 fraction: 0.48944771271840637 weight: 0.204311916066779 count:  261160
[0.41083246 1.4880389  1.5573096  0.73676646 0.20431192]


Loss and Model functions

In [0]:
global_indices = tf.constant([0., 1., 2., 3., 4.])
def star_squared_error(y_true, y_pred):
    indices = tf.reshape(tf.tile(global_indices, [tf.shape(y_pred)[0]]), tf.shape(y_pred))
    true_indices = tf.squeeze(y_true, axis=1)
    weighted = y_pred * indices
    weighted_avgs = tf.reduce_sum(weighted, axis=1)
    return (weighted_avgs - true_indices) ** 2

def weighted_loss(loss_func, weights=[2., 5., 5., 3., 1.]):
    loss_weights = tf.constant(weights)
    def weighted_loss_func(y_true, y_pred):
        true_indices = tf.cast(tf.squeeze(y_true, axis=1), tf.int32)
        one_hots = tf.one_hot(true_indices, depth=5, dtype=tf.float32)
        weight_vec = tf.linalg.matvec(one_hots, loss_weights)
        return weight_vec * loss_func(y_true, y_pred)
    return weighted_loss_func


def hybrid_loss(weighting=[.5, .5]):
    entropy_weighting, error_weighting = weighting
    def loss_func(y_true, y_pred):
        entropy_loss = sparse_categorical_crossentropy(y_true, y_pred)
        star_loss = star_squared_error(y_true, y_pred)
        return entropy_weighting * entropy_loss + error_weighting * star_loss
    return loss_func



In [0]:
labels = tf.constant([[2.], [4.]], dtype=tf.float32)
preds = tf.constant([
                     [1., 0., 0., 0., 0.],
                     [0, 0, 1, 0, 0]
], dtype=tf.float32)
print(star_squared_error(labels, preds).numpy())
# print(weighted_star_loss(labels, preds).numpy())

In [0]:
def build_model(input_length=150, rnn_size=256, loss='scc', use_glove=False, vocab_size=50000, 
                learning_rate=1e-3, dropout_rate=.2, use_gru=True, use_bidirectional=True, 
                use_c2v=False, show_accuracy=True, hybrid_weighting=[.5, .5]):
    model = Sequential()
    if not use_c2v:
        if use_glove:
            embed = Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=False)
        else:
            embed = Embedding(vocab_size, rnn_size, input_length=input_length)
        model.add(embed)
    if use_gru:
        rnn_cell = GRU(rnn_size, dropout=dropout_rate)
    else:
        rnn_cell = LSTM(rnn_size, dropout=dropout_rate)
    if use_bidirectional:
        model.add(Bidirectional(rnn_cell))
    else:
        model.add(rnn_cell)
    model.add(Dense(5, activation='softmax'))
    if loss == 'scc':
        loss_func = sparse_categorical_crossentropy
    elif loss == 'star':
        loss_func = star_squared_error
    elif loss == 'hybrid':
        loss_func = hybrid_loss(hybrid_weighting)
    elif loss == 'wsl':
        loss_func = weighted_star_loss
    optimizer = Adam(learning_rate=learning_rate)
    metrics = ['sparse_categorical_accuracy'] if show_accuracy else []
    model.compile(optimizer=optimizer, loss=loss_func, metrics=metrics)
    return model

def build_char_model(input_length=150, word_length=5, word_embedding_dim=100, 
                     char_embedding_dim=10, use_glove=False, vocab_size=50000, 
                     char_vocab_size=72, learning_rate=1e-3, dropout_rate=.2, 
                     use_gru=True, use_bidirectional=True, use_c2v=False, loss='scc',
                     show_accuracy=True, weight_loss=False, loss_weights=[2., 5., 5., 3., 1.]):
    word_inputs = Input(shape=(input_length,))
    char_inputs = Input(shape=(input_length, word_length))
    flattened_chars = Flatten()(char_inputs)
    if not use_c2v:
        if use_glove:
            embed = Embedding(vocab_size, word_embedding_dim, weights=[embedding_matrix], trainable=False)
        else:
            embed = Embedding(vocab_size, word_embedding_dim, input_length=input_length)
        word_embeddings = embed(word_inputs)
        flattened_character_embeddings = Embedding(char_vocab_size, char_embedding_dim, input_length=word_length * input_length)(flattened_chars)
        character_embeddings = Reshape((input_length, word_length * char_embedding_dim))(flattened_character_embeddings)
    embeddings = Concatenate()([word_embeddings, character_embeddings])
    rnn_size = word_embedding_dim + word_length * char_embedding_dim
    if use_gru:
        rnn_cell = GRU(rnn_size, dropout=dropout_rate)
    else:
        rnn_cell = LSTM(rnn_size, dropout=dropout_rate)
    if use_bidirectional:
        rnn_out = Bidirectional(rnn_cell)(embeddings)
    else:
        rnn_out = rnn_cell(embeddings)
    logits = Dense(5, activation='softmax')(rnn_out)
    model = Model([word_inputs, char_inputs], logits)
    if loss == 'scc':
        loss_func = sparse_categorical_crossentropy
    elif loss == 'star':
        loss_func = star_squared_error
    elif loss == 'hybrid':
        loss_func = hybrid_loss(hybrid_weighting)
    if weight_loss:
        loss_func = weighted_loss(loss_func, loss_weights)
    optimizer = Adam(learning_rate=learning_rate)
    metrics = ['sparse_categorical_accuracy'] if show_accuracy else []
    model.compile(optimizer=optimizer, loss=loss_func, metrics=metrics)
    return model

def build_transformer_model(num_transformers=6, learning_rate=1e-3):
    def weighted_loss(loss_func, weights=[2., 5., 5., 3., 1.]):
        loss_weights = tf.constant(weights)
        def weighted_loss_func(y_true, y_pred):
            weight_vec = tf.linalg.matvec(y_true, loss_weights)
            return weight_vec * loss_func(y_true, y_pred)
        return weighted_loss_func
    inputs, output_layer = get_model(
        token_num=50000,
        head_num=5,
        transformer_num=num_transformers,
        embed_dim=100,
        feed_forward_dim=100,
        seq_len=150,
        pos_num=150,
        dropout_rate=0.05,
        training=False,
        trainable=True,
        output_layer_num=1
    )

    extract_layer = Extract(index=0, name='Extract')(output_layer)
    feed_forward_1 = Dense(units=100, name="feed_forward_1")(extract_layer)
    output_logits = Dense(
        units=5,
        activation='softmax',
        name='NSP',
    )(feed_forward_1)

    model = Model(inputs, [output_logits])
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=weighted_loss(categorical_crossentropy), metrics=['accuracy'])
    return model

In [0]:
class YelpModel:
    def __init__(self, keras_model):
        self.keras_model = keras_model

    def predict_ratings(self, preprocessed_inputs):
        return [np.argmax(p) + 1 for p in self.keras_model.predict(preprocessed_inputs)]

    def predict(self, preprocessed_inputs):
        return self.keras_model.predict(preprocessed_inputs)

class EnsembleModel(YelpModel):
    def __init__(self, config):
        self.num_models = len(config)
        self.models = [model for model, _ in config]
        self.weights = [weight for _, weight in config]
            
    # averages the softmax probabilites
    def predict_ratings(self, preprocessed_inputs):
        assert len(preprocessed_inputs) == self.num_models
        num_samples = len(preprocessed_inputs[0])
        if num_samples == 2:   # dumbass hard code to fix char inputs - np.ma.size(..., axis=-2) didn't work
            num_samples = len(preprocessed_inputs[0][0])
        predictions = np.zeros((num_samples, 5))
        for i, inputs in enumerate(preprocessed_inputs):
            predictions += self.weights[i] * self.models[i].predict(inputs)
        return [np.argmax(p) + 1 for p in predictions]

    def all_probs(self, preprocessed_inputs):
        return np.array([self.models[i].predict(pi) for i, pi in enumerate(preprocessed_inputs)])

    def copy(self):
        clone = EnsembleModel([])
        clone.models = self.models
        clone.weights = self.weights.copy()
        clone.num_models = self.num_models
        return clone
      


In [0]:
class YelpPreprocessor:
    def preprocess(self, texts):
        raise NotImplementedError # abstract class

class SimpleTokenizerPadder(YelpPreprocessor):
    def __init__(self, tokenizer, input_length=150):
        self.tokenizer = tokenizer
        self.input_length = input_length
    def preprocess(self, texts):
        return pad_sequences(self.tokenizer.texts_to_sequences(texts), maxlen=self.input_length)

class CharacterModelPreprocessor(YelpPreprocessor):
    def __init__(self, word_tokenizer, char_tokenizer, input_length=150, word_length=5):
        self.word_tokenizer = word_tokenizer
        self.input_length = input_length
        self.char_tokenizer = char_tokenizer
        self.word_length = word_length
    def character_preprocess(self, texts):
        char_sequences = self.char_tokenizer.texts_to_sequences(texts)
        out = np.zeros((len(char_sequences), self.input_length, self.word_length))
        space_character = self.char_tokenizer.word_index[' ']
        for i, seq in enumerate(char_sequences):
            word_index = 0
            char_index = 0
            for c in char_sequences[i]:
                if c == space_character:
                    if char_index != 0:
                        word_index += 1
                    char_index = 0
                else:
                    if char_index < self.word_length:
                        out[i, word_index, char_index] = c
                    char_index += 1
                if word_index >= self.input_length:
                    break
            if word_index < self.input_length:
                adj = 1 if char_index != 0 else 0 # if char_index is 0, we added one at the end, and word_index = num_words, else we are at example index 1 in a len 5, and we want to roll 3, not 4
                out[i] = np.roll(out[i], self.input_length - word_index - adj, axis=0)
        return out
    
    def word_preprocess(self, texts):
        return pad_sequences(self.word_tokenizer.texts_to_sequences(texts), maxlen=self.input_length)

    def preprocess(self, texts):
        return [self.word_preprocess(texts), self.character_preprocess(texts)]

class BertTokenizer(YelpPreprocessor):
    def __init__(self, tokenizer, input_length=150):
        self.tokenizer = tokenizer
        self.input_length = input_length
    def preprocess(self, texts):
        sequences = np.zeros((len(texts), self.input_length))
        segments = np.zeros((len(texts), self.input_length))
        for i, text in enumerate(texts):
            sequences[i], segments[i] = self.tokenizer.encode(text, max_len=self.input_length)
            sequences[sequences > 100000] = 1 
        return [sequences, segments]    

class EnsemblePreprocessor(YelpPreprocessor):
    def __init__(self, preprocessors):
        self.preprocessors = preprocessors

    def preprocess(self, texts):
        return [p.preprocess(texts) for p in self.preprocessors]

Load in all the tokenizers and models

In [0]:
texts, labels = get_texts_and_labels(dataset)
tokenizer_50000 = load_tokenizer("test_tokenizer_50000")
tokenizer_50000_with_unks = load_tokenizer("test_tokenizer_50000_with_unks")
tokenizer_100000 = load_tokenizer("test_tokenizer_100000")
tokenizer_100000_with_unks = load_tokenizer("test_tokenizer_100000_with_unks")
char_tk = load_tokenizer("test_char_tokenizer")
new_token_dict = get_base_dict()
for word, i in tokenizer_100000_with_unks.word_index.items():
    if word != 'UNK':
        if i + 3 < 50000:
            new_token_dict[word] = i + 3
transformer_tokenizer = bert_tokenizer(new_token_dict)


preprocessor = SimpleTokenizerPadder(tokenizer_50000)
glove_preprocessor = SimpleTokenizerPadder(tokenizer_100000, input_length=300)
glove_char_preprocessor = CharacterModelPreprocessor(tokenizer_100000_with_unks, char_tk, input_length=300)
char_preprocessor = CharacterModelPreprocessor(tokenizer_50000_with_unks, char_tk)
bert_preprocessor = BertTokenizer(transformer_tokenizer)

weighted_star_loss = weighted_loss(star_squared_error)

glove_gru_bi = YelpModel(load_keras_model("glove_gru_bi"))
glove_gru_bi_char = YelpModel(load_keras_model("glove_gru_bi_char"))
gru_bi_50000 = YelpModel(load_keras_model("gru_bi_50000"))
gru_bi_50000_star_loss = YelpModel(load_custom_model("gru_bi_50000_star_loss", star_squared_error, metrics=['sparse_categorical_accuracy']))
gru_bi_50000_wsl = YelpModel(load_custom_model("gru_bi_50000_wsl", weighted_star_loss, metrics=['sparse_categorical_accuracy']))
gru_bi_50000_hl = YelpModel(load_custom_model("gru_bi_50000_hybrid_loss", hybrid_loss()))
gru_bi_char = YelpModel(load_keras_model("gru_bi_char"))
gru_bi_char_wscc = YelpModel(load_custom_model("gru_bi_char_wscc", weighted_loss(sparse_categorical_crossentropy)))
bert_model = YelpModel(load_transformer("checkpoints/bert_model_6_wscc_epoch_11.h5"))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Set up ensemble

In [0]:
def generate_weights(num_models, depth=.05):
    divisor = int(1 / depth)
    def generate_helper(current_weights, left):
        if sum(current_weights) > divisor:
            return
        elif left <= 1:
            for w in range(21):
                if w + sum(current_weights) == divisor:
                    yield [weight / divisor for weight in current_weights + [w]]
        else:
            for w in range(21):
                yield from generate_helper(current_weights + [w], left - 1)
    yield from generate_helper([], num_models)

def best_weights_given_probs(probs, labels):
    bests = {'acc': [0, []], 'err': [5, []], 'score': [-100, []]}
    num_samples = len(probs[0])
    for weights in generate_weights(len(probs)):
        average_probs = np.average(probs, axis=0, weights=weights)
        predictions = np.argmax(average_probs, axis=1)
        acc = np.sum(predictions == labels) / num_samples
        star_err = np.sum(np.abs(predictions - labels)) / num_samples
        score = acc - star_err
        if acc > bests['acc'][0]:
            bests['acc'] = [acc, weights]
        if star_err < bests['err'][0]:
            bests['err'] = [star_err, weights]
        if score > bests['score'][0]:
            bests['score'] = [score, weights]
    return bests

def best_weights(ensemble_model, ensemble_preproc, test_set):
    texts, labels = get_texts_and_labels(test_set)
    inputs = ensemble_preproc.preprocess(texts)
    probs = ensemble_model.all_probs(inputs)
    return best_weights_given_probs(probs, labels)

def save_bests(bests, save_name):
    with open(os.path.join(ensemble_dir, save_name), "w+") as snf:
        print(json.dumps(bests), file=snf)

def load_bests(save_name):
    with open(os.path.join(ensemble_dir, save_name)) as snf:
        return json.load(snf)

def get_mps_for_bests(ensemble, preproc, names):
    models_and_preprocs = {}
    bests = [load_bests(name) for name in names]
    for name, best in zip(names, bests):
        for met in ('ACC', 'ERR', 'SCORE'):
            cur_ens = ensemble.copy()
            cur_ens.weights = best[met.lower()][1]
            models_and_preprocs[f"{name}_{met}"] = (cur_ens, preproc)
    return models_and_preprocs


HERE IS THE BIG ENSEMBLE, UNWEIGHTED

In [0]:
all_models =          [
                          (glove_gru_bi, 0.), (glove_gru_bi_char, 0.),
                          (gru_bi_50000, 0.), (gru_bi_50000_star_loss, 0.), (gru_bi_50000_wsl, 0.),
                          (gru_bi_char, 0.), (gru_bi_char_wscc, 0.),
                          (bert_model, 0.)
                      ]
ensemble = EnsembleModel(all_models)
#bigger_ensemble = EnsembleModel(all_models)


ensemble_preproc = EnsemblePreprocessor([glove_preprocessor, glove_char_preprocessor,
                                         preprocessor, preprocessor, preprocessor,
                                         char_preprocessor, char_preprocessor,
                                         bert_preprocessor])


CHANGE THE TEST SETS AND RUN THIS BLOCK TO GET THE BEST WEIGHTS

In [0]:
if True:
    bests = best_weights(ensemble, ensemble_preproc, random_test_set)
    name = "big_ensemble_random_test_set_results"
    # save it 
    save_bests(bests, name)
    print(bests)

{'acc': [0.8982, [0.0, 0.0, 0.35, 0.1, 0.0, 0.05, 0.0, 0.5]], 'err': [0.1192, [0.0, 0.0, 0.4, 0.1, 0.0, 0.0, 0.0, 0.5]], 'score': [0.779, [0.0, 0.0, 0.4, 0.1, 0.0, 0.0, 0.0, 0.5]]}


In [0]:
names = ["big_ensemble_random_test_set_results"]
mps = get_mps_for_bests(ensemble, ensemble_preproc, names)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, mps)

╒════════════════════════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model                                      │ OVERALL                 │ CHALLENGE 3             │ CHALLENGE 5             │ CHALLENGE 6             │ CHALLENGE 8             │
│                                            │ star error | accuracy   │ star error | accuracy   │ star error | accuracy   │ star error | accuracy   │ star error | accuracy   │
╞════════════════════════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡
│ big_ensemble_random_test_set_results_ACC   │ 0.958       49.233      │ 0.551 *     53.933 *    │ 0.682 *     38.800 *    │ 2.114       40.000      │ 0.486       64.200 *    │
├────────────────────────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────

CHANGE THE NAMES AND RUN TO TEST IT

In [0]:
names = ["big_ensemble_results", "big_ensemble_no_challenge_5_results", "big_ensemble_challenges_3_8_results"]
mps = get_mps_for_bests(ensemble, ensemble_preproc, names)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, mps)

{'acc': [0.5575221238938053, [0.05, 0.1, 0.05, 0.05, 0.4, 0.1, 0.25, 0.0]], 'err': [0.8254670599803343, [0.0, 0.05, 0.2, 0.15, 0.55, 0.0, 0.0, 0.05]], 'score': [-0.272369714847591, [0.0, 0.0, 0.0, 0.15, 0.15, 0.15, 0.4, 0.15]]}
{'acc': [0.5932203389830508, [0.05, 0.15, 0.2, 0.0, 0.25, 0.2, 0.15, 0.0]], 'err': [0.9015645371577575, [0.0, 0.0, 0.3, 0.15, 0.35, 0.2, 0.0, 0.0]], 'score': [-0.31551499348109513, [0.05, 0.1, 0.2, 0.05, 0.25, 0.2, 0.15, 0.0]]}
{'acc': [0.6595744680851063, [0.0, 0.45, 0.1, 0.1, 0.15, 0.0, 0.2, 0.0]], 'err': [0.3916827852998066, [0.2, 0.05, 0.0, 0.1, 0.05, 0.05, 0.5, 0.05]], 'score': [0.2659574468085106, [0.1, 0.3, 0.05, 0.05, 0.0, 0.15, 0.3, 0.05]]}
╒═══════════════════════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model                                     │ OVERALL                 │ CHALLENGE 3             │ CHALLENGE 5             │ CHALLENGE 6       

In [0]:
names = ["big_ensemble_results", "big_ensemble_no_challenge_5_results"]
mps = get_mps_for_bests(ensemble, ensemble_preproc, names)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_class_accuracies(challenge_3 + challenge_5 + challenge_6 + challenge_8, mps)

╒═══════════════════════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model                                     │ OVERALL                 │ 1                       │ 2                       │ 3                       │ 4                       │ 5                       │
│                                           │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │
╞═══════════════════════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡
│ big_ensemble_results_ACC                  │ 55.752     0.830        │ 0.624     1.003         │ 0.534     0.514         │ 0.545     0.500         │ 0.604     0.416         │ 0.522     1.301 

In [0]:
texts, labels = get_texts_and_labels(challenge_3 + challenge_5 + challenge_6 + challenge_8)
ensemble.weights = [0.05, 0.15, 0.2, 0.0, 0.25, 0.2, 0.15, 0.0]
inputs = ensemble_preproc.preprocess(texts)
predictions = np.asarray(ensemble.predict_ratings(inputs)) - 1


In [0]:
print(predictions)

[3 2 0 ... 4 4 4]


In [0]:
import sklearn
confusion_matrix = sklearn.metrics.confusion_matrix(labels, predictions)
print(confusion_matrix)

[[258  36  12  11  79]
 [224 274  82  16   9]
 [  7  18  98  47   8]
 [  1   2  24 123 100]
 [151   6  12  62 374]]


                                        0.395	       0.633	     0.540	      0.518	    1.994     	0.418	      0.458	       0.634		

OLD STUFF:






Store a mapping of names to ensembles for easy loading

In [0]:
mapping = {
    "GRU_BI": (gru_bi_50000, preprocessor),
    "GRU_BI_WSL": (gru_bi_50000_wsl, preprocessor),
    "GRU_BI_CHAR": (gru_bi_char, char_preprocessor),
    "GRU_BI_CHAR_WSCC": (gru_bi_char_wscc, char_preprocessor),
    "BERT_MODEL": (bert_model, bert_preprocessor)
}

In [0]:
models_and_preprocs = get_mps_for_all_bests(mapping)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, models_and_preprocs)

Find the best ensemble over all challenge sets

In [0]:
with open(os.path.join(ensemble_dir, "all_results"), "w+") as ar:
    print(json.dumps(results), file=ar)

In [0]:
mpns = [
          (gru_bi_50000, preprocessor, "GRU_BI"),
          (gru_bi_50000_wsl, preprocessor, "GRU_BI_WSL"),
          (gru_bi_char, char_preprocessor, "GRU_BI_CHAR"),
          (gru_bi_char_wscc, char_preprocessor, "GRU_BI_CHAR_WSCC"),
          (bert_model, bert_preprocessor, "BERT_MODEL")
]
if True: # I already ran this, so I'll just load it from file
    bests = best_ensemble(mpns, challenge_3 + challenge_6 + challenge_8, avg_predictions=True)
    save_bests(bests, "challenge_3_6_8_avg_preds_emsemble_bests")
else:
    bests = load_bests("all_tests_ensemble_bests", ensemble_mapping)
print(bests)

Save the results to a json file, and test the ensembles (one for each metric - err is star error and score is accuracy / 100 - star error)

In [0]:
models_and_preprocs = models_and_preprocs_from_bests(bests, avg_predictions=True)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, models_and_preprocs)

╒═════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model               │ OVERALL                 │ CHALLENGE 3             │ CHALLENGE 5             │ CHALLENGE 6             │ CHALLENGE 8             │
│                     │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │
╞═════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡
│ BEST_ACC_ENSEMBLE   │ 48.919       0.950      │ 60.674       0.464      │ 27.000       0.812      │ 44.000       2.030      │ 64.000 *     0.494      │
├─────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┤
│ BEST_ERR_ENSEMBLE   │ 50.277       0.873 *    │ 63.109 *     0.404 *    │ 

In [0]:
models_and_preprocs = get_mps_for_all_bests(ensemble_mapping)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, models_and_preprocs)

Find the best ensemble over every challenge except 5 (bc why not)

In [0]:
new_bests = load_bests("no_challenge_5_bests", ensemble_mapping)
print(new_bests)

{'acc': [0.5775749674054759, <__main__.EnsembleModel object at 0x7f1734b8c0b8>, <__main__.EnsemblePreprocessor object at 0x7f1734b8ccc0>, [0.2, 0.1, 0.5, 0.2], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR-GRU_BI_CHAR_WSCC'], 'err': [0.9250325945241199, <__main__.EnsembleModel object at 0x7f1734b8cd68>, <__main__.EnsemblePreprocessor object at 0x7f1734b8cc88>, [0.25, 0.4, 0.35], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR'], 'score': [-0.3487614080834419, <__main__.EnsembleModel object at 0x7f1734b8cd68>, <__main__.EnsemblePreprocessor object at 0x7f1734b8cc88>, [0.25, 0.4, 0.35], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR']}


In [0]:
models_and_preprocs = models_and_preprocs_from_bests(new_bests)
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, models_and_preprocs)

╒═════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model               │ OVERALL                 │ CHALLENGE 3             │ CHALLENGE 5             │ CHALLENGE 6             │ CHALLENGE 8             │
│                     │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │
╞═════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡
│ BEST_ACC_ENSEMBLE   │ 54.121     0.871        │ 63.483     0.404        │ 43.600     0.640        │ 43.800     2.020        │ 65.600     0.420        │
├─────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┤
│ BEST_ERR_ENSEMBLE   │ 54.287     0.856        │ 62.547     0.410        │ 

In [0]:
print(load_bests("all_tests_ensemble_bests", ensemble_mapping))

{'acc': [0.5521140609636185, <__main__.EnsembleModel object at 0x7fdde7f30160>, <__main__.EnsemblePreprocessor object at 0x7fdde7f30358>, [0.1, 0.4, 0.3, 0.2], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR-GRU_BI_CHAR_WSCC'], 'err': [0.8352999016715831, <__main__.EnsembleModel object at 0x7fdde7f30160>, <__main__.EnsemblePreprocessor object at 0x7fdde7f30358>, [0.25, 0.55, 0.15, 0.05], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR-GRU_BI_CHAR_WSCC'], 'score': [-0.2846607669616519, <__main__.EnsembleModel object at 0x7fdde7f30160>, <__main__.EnsemblePreprocessor object at 0x7fdde7f30358>, [0.05, 0.4, 0.4, 0.15], 'GRU_BI-GRU_BI_WSL-GRU_BI_CHAR-GRU_BI_CHAR_WSCC']}


Here are all of the best ensembles hardcoded, for posterity

In [0]:
best_acc_ensemble_config = [
    (gru_bi_50000, .1),
    (gru_bi_50000_wsl, .4),
    (gru_bi_char, .3),
    (gru_bi_char_wscc, .2)
]

best_err_ensemble_config = [
    (gru_bi_50000, .25),
    (gru_bi_50000_wsl, .55),
    (gru_bi_char, .15),
    (gru_bi_char_wscc, .05)
]

best_score_ensemble_config = [
    (gru_bi_50000, .05),
    (gru_bi_50000_wsl, .4),
    (gru_bi_char, .4),
    (gru_bi_char_wscc, .15)
]

best_acc_ensemble = EnsembleModel(best_acc_ensemble_config)
best_err_ensemble = EnsembleModel(best_err_ensemble_config)
best_score_ensemble = EnsembleModel(best_score_ensemble_config)
best_ensemble_preprocessor = EnsemblePreprocessor([preprocessor, preprocessor, char_preprocessor, char_preprocessor])

In [0]:
models_and_preprocs = {
                        "BEST_ACC_ENSEMBLE": (best_acc_ensemble, best_ensemble_preprocessor),
                        "BEST_ERR_ENSEMBLE": (best_err_ensemble, best_ensemble_preprocessor),
                        "BEST_SCORE_ENSEMBLE": (best_score_ensemble, best_ensemble_preprocessor)
                      }

In [0]:
test_sets = {"CHALLENGE 3": challenge_3, "CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6, "CHALLENGE 8": challenge_8}
results = compare_on_test_sets(test_sets, models_and_preprocs)

╒═════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╤═════════════════════════╕
│ Model               │ OVERALL                 │ CHALLENGE 3             │ CHALLENGE 5             │ CHALLENGE 6             │ CHALLENGE 8             │
│                     │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │ accuracy | star error   │
╞═════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╪═════════════════════════╡
│ BEST_ACC_ENSEMBLE   │ 55.074     0.847        │ 63.296     0.395        │ 51.800     0.540        │ 41.800     1.994        │ 63.400     0.458        │
├─────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┼─────────────────────────┤
│ BEST_ERR_ENSEMBLE   │ 54.724     0.843        │ 63.296     0.395        │ 

In [0]:
train_data, testing_data = train_test_split(dataset, train_size=.8)
balanced_test_set = get_balanced_dataset(testing_data)

In [0]:
compare_class_accuracies(balanced_test_set, models_and_preprocs)

In [0]:
from keras.utils import to_categorical

In [0]:
train_seqs, test_seqs, train_labels, test_labels = train_test_split(preprocessor.preprocess(texts), labels, train_size=.8)

In [0]:
processed_words, processed_chars = char_preprocessor.preprocess(texts)
train_seq_words, test_seq_words, train_seq_chars, test_seq_chars, train_labels, test_labels = train_test_split(processed_words, 
                                                                                                               processed_chars, labels, train_size=.8)

In [0]:
start = np.random.randint(0, 400000)
fake_test_set = [{"text": text, "stars": stars + 1} for text, stars in zip(texts[start:start + 10000], labels[start:start + 10000])]

In [0]:
train_model(gru_bi_char_wscc, [train_seq_words, train_seq_chars], train_labels, 2, "gru_bi_char_wscc")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 341491 samples, validate on 85373 samples
Epoch 1/2

Epoch 00001: saving model to /content/gdrive/My Drive/YelpHelp/models/checkpoints/gru_bi_50000_wscc.ckpt
Epoch 2/2

Epoch 00002: saving model to /content/gdrive/My Drive/YelpHelp/models/checkpoints/gru_bi_50000_wscc.ckpt


In [0]:
train_model(gru_bi_50000_wsl, train_seqs, train_labels, 2, "gru_bi_wsl")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 341491 samples, validate on 85373 samples
Epoch 1/2

In [0]:
loss, accuracy = bert_model.evaluate(bert_preprocessor.preprocess(fake_test_set), to_categorical(np.asarray(fake_test_set_labels)), batch_size=64)
print(f"Loss: {loss}\tAccuracy: {accuracy}")

Loss: 0.4607791851043701	Accuracy: 0.8310999870300293


In [0]:
gru_bi_char = load_keras_model("gru_bi_char")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [0]:
test_sets = {"CHALLENGE 5": challenge_5, "CHALLENGE 6": challenge_6}
models_and_preprocs = {"GRU_BI": (gru_bi_50000_wsl, preprocessor), "GRU_BI_CHAR": (gru_bi_char, char_preprocessor)}
compare_on_test_sets(test_sets, models_and_preprocs)

CHALLENGE 5:
-------------------
GRU_BI:
Accuracy: 62.200
Average Star Error: 0.42200

GRU_BI_CHAR:
Accuracy: 42.400
Average Star Error: 0.66000

CHALLENGE 6:
-------------------
GRU_BI:
Accuracy: 30.400
Average Star Error: 2.03800

GRU_BI_CHAR:
Accuracy: 44.200
Average Star Error: 1.99400



Try some models

Some examples

In [0]:
ex1 = "The place was pretty decent. We got seated quickly and the service was pretty good. It was a bit too dark and loud for me, but that's just my preference. I would definitely come back here again"
ex2 = "Chuck-e-cheeze makes its competitors look like trash. Dave and busters: shit. Nobody is as good as chuck-e-cheeze. I will not be going anywhere else for a good time."
ex3 = "I lobe this plase. Evertime I com here its greate. Will for sure come agan@"
text_examples = [ex1, ex2, ex3]

# random examples
random_samples = np.random.choice(dataset, 5)


In [0]:
mini_test_set = dataset[:10000]
predict_test_set(mini_test_set, gru_bi_50000_wsl, preprocessor)

Accuracy: 70.120
Average Star Error: 0.33660


In [0]:
predict_from_data(gru_bi_50000_hybrid_loss, mini_test_set, tokenizer_50000)

In [0]:
predict_on_texts(gru_bi_50000_wsl, text_examples, preprocessor)

---------------------
TEXT:
The place was pretty decent. We got seated quickly and the service was pretty good. It was a bit too dark and loud for me, but that's just my preference. I would definitely come back here again
PREDICTED STARS:4
---------------------
TEXT:
Chuck-e-cheeze makes its competitors look like trash. Dave and busters: shit. Nobody is as good as chuck-e-cheeze. I will not be going anywhere else for a good time.
PREDICTED STARS:3
---------------------
TEXT:
I lobe this plase. Evertime I com here its greate. Will for sure come agan@
PREDICTED STARS:4


Play around with chars2vec

In [0]:
!pip install chars2vec

Collecting chars2vec
[?25l  Downloading https://files.pythonhosted.org/packages/04/0a/8c327aae23e0532d239ec7b30446aca765eb5d9547b4c4b09cdd82e49797/chars2vec-0.1.7.tar.gz (8.1MB)
[K     |████████████████████████████████| 8.1MB 4.0MB/s 
[?25hBuilding wheels for collected packages: chars2vec
  Building wheel for chars2vec (setup.py) ... [?25l[?25hdone
  Created wheel for chars2vec: filename=chars2vec-0.1.7-cp36-none-any.whl size=8111096 sha256=5eb8672ddc46237932fced4d74bfbd9db1027a6401fc078fa917babcc4c8e73a
  Stored in directory: /root/.cache/pip/wheels/97/b6/65/d7e778ef1213ec77d315aea0f536068b96e36cc94c02abbfde
Successfully built chars2vec
Installing collected packages: chars2vec
Successfully installed chars2vec-0.1.7


In [0]:
import chars2vec as c2v
c2v_100 = c2v.load_model('eng_100')
input_length = 300

In [0]:
sample = dataset[0]["text"].split()
vectors = c2v_100.vectorize_words(sample, maxlen_padseq=input_length)
vectors.shape

(39, 100)

In [0]:
glove_dir = os.path.join(root_folder, "source", "glove")
glove_file, embedding_dim = os.path.join(glove_dir, "glove.6B.100d.txt"), 100 # embedding dim should match file name
glove_mappings = {}
with open(glove_file) as gf:
    for line in gf:
        parts = line.split()
        word = parts[0]
        vec = np.asarray(parts[1:], dtype='float32')
        glove_mappings[word] = vec
embedding_matrix = np.zeros((max_vocab_len, embedding_dim))
found, not_found = 0, 0
for word, i in tokenizer.word_index.items():
    if i >= max_vocab_len:
        continue
    vec = glove_mappings.get(word)
    if vec is not None:
        embedding_matrix[i] = vec
        found += 1
    else:
        not_found += 1
print(f"Found {found} words")
print(f"Couldn't find {not_found} words")
print(len(glove_mappings))

Found 67209 words
Couldn't find 32790 words
400000


In [0]:
# old dumb stuff
def test_weight(mps, weights_chunk, q, test_sets):
    print("helloo")
    for ws in weights_chunk:
        ensemble = EnsembleModel([(mps[i][0], ws[i]) for i in range(len(mps))])
        temp = {"temp": (ensemble, preproc)}
        results = compare_on_test_sets(test_sets, temp, show_results=False)
        results['weights'] = ws
        q.put(results)
        acc, star_err = results["temp"]["overall"]
        print("weights:", ws, "accuracy:", acc, "star-error:", star_err)

def find_best_weighting_synch(mps, test_sets):
  # WIP
    bests = {'acc': [0, []], 'err': [5, []], 'score': [-1000, []]}
    m = Manager()
    q = m.Queue()
    preproc = EnsemblePreprocessor([mp[1] for mp in mps])
    weights = list(generate_weights(len(mps)))
    chunk_size = 20
    weight_chunks = [weights[i: i + chunk_size] for i in range(0, len(weights), chunk_size)]
    chunks = [(mps, wc, q, test_sets) for wc in weight_chunks]
    with Pool(processes=os.cpu_count()) as pool:
        pool.starmap(test_weight, chunks)
    while not q.empty():
        results = q.get()
        acc, star_err = results["temp"]["overall"]
        weights = results["weights"]
        score = acc / 100 - star_err
        if acc > bests['acc'][0]:
            bests['acc'] = [acc, weights]
        if star_err < bests['err'][0]:
            bests['err'] = [star_err, weights]
        if score > bests['score'][0]:
            bests['score'] = [score, weights]
    return bests

def find_best_weighting(models_and_preprocs, test_sets, save_as, start_from_file=False):
    save_file = os.path.join(test_set_dir, save_as)
    bests = {'acc': [0, []], 'err': [5, []], 'score': [-1000, []], 'cur': 0}
    preproc = EnsemblePreprocessor([mp[1] for mp in models_and_preprocs])
    all_weights = list(generate_weights(num_models=len(models_and_preprocs)))
    if start_from_file:
        with open(save_file) as sf:
            bests = json.load(sf)
    weights = all_weights[bests['cur']:]
    for ws in weights:
        ensemble = EnsembleModel([(models_and_preprocs[i][0], ws[i]) for i in range(len(ws))])
        temp = {"temp": (ensemble, preproc)}
        results = compare_on_test_sets(test_sets, temp, show_results=False)
        acc, star_err = results["temp"]["overall"]
        score = acc / 100 - star_err
        if acc > bests['acc'][0]:
            bests['acc'] = [acc, ws]
        if star_err < bests['err'][0]:
            bests['err'] = [star_err, ws]
        if score > bests['score'][0]:
            bests['score'] = [score, ws]
        bests['cur'] += 1
        with open(save_file, 'w+') as sf:
            print(json.dumps(bests), file=sf)
    return bests

In [0]:
bert_ensemble_config = [ # best weights
    (gru_bi_50000_wsl, .4),
    (gru_bi_char_wscc, .45),
    (bert_model, .15)
]

char_ensemble_config = [  
    (gru_bi_50000_wsl, .45),
    (gru_bi_char_wscc, .3),
    (gru_bi_char, .25)
]

cec2 = [  # best weights
    (gru_bi_50000_wsl, .3),
    (gru_bi_char_wscc, .4),
    (gru_bi_char, .3)
]

full_ensemble_config = [
    (gru_bi_50000_wsl, .25),
    (gru_bi_char_wscc, .25),
    (gru_bi_char, .25),
    (bert_model, .25)
]
fec2 = [
    (gru_bi_50000_wsl, .2),
    (gru_bi_char_wscc, .2),
    (gru_bi_char, .4),
    (bert_model, .2)
]

bert_ensemble = EnsembleModel(bert_ensemble_config)
char_ensemble = EnsembleModel(char_ensemble_config)
char_ensemble2 = EnsembleModel(cec2)
full_ensemble = EnsembleModel(full_ensemble_config)
full_ensemble2 = EnsembleModel(fec2)
bert_ensemble_preprocessor = EnsemblePreprocessor([preprocessor, char_preprocessor, bert_preprocessor])
char_ensemble_preprocessor = EnsemblePreprocessor([preprocessor, char_preprocessor, char_preprocessor])
full_ensemble_preprocessor = EnsemblePreprocessor([preprocessor, char_preprocessor, char_preprocessor, bert_preprocessor])







def get_best_weights(self, preprocessed_inputs, labels, weights_generator):
        if self.avg_predictions:
            return self.get_best_weights1(preprocessed_inputs, labels, weights_generator)
        else:
            return self.get_best_weights2(preprocessed_inputs, labels, weights_generator)

    def get_best_weights1(self, preprocessed_inputs, labels, weights_generator):
        bests = {'acc': [0, []], 'err': [5, []], 'score': [-1000, []]}
        num_samples = len(preprocessed_inputs[0])
        if num_samples == 2:   # dumbass hard code to fix char inputs - np.ma.size(..., axis=-2) didn't work
            num_samples = len(preprocessed_inputs[0][0])
        probs = np.zeros((self.num_models, num_samples, 5))
        for i, inputs in enumerate(preprocessed_inputs):
            probs[i] = self.models[i].predict(inputs)
        all_predictions = np.argmax(probs, axis=2)
        for weights in weights_generator:
            average_preds = np.average(all_predictions, axis=0, weights=weights)
            predictions = np.around(average_preds)
            acc = np.sum(predictions == labels) / num_samples
            star_err = np.sum(np.abs(predictions - labels)) / num_samples
            score = acc - star_err
            if acc > bests['acc'][0]:
                bests['acc'] = [acc, weights]
            if star_err < bests['err'][0]:
                bests['err'] = [star_err, weights]
            if score > bests['score'][0]:
                bests['score'] = [score, weights]
        return bests

    def get_best_weights2(self, preprocessed_inputs, labels, weights_generator):
        assert len(preprocessed_inputs) == self.num_models
        bests = {'acc': [0, []], 'err': [5, []], 'score': [-1000, []]}
        num_samples = len(preprocessed_inputs[0])
        if num_samples == 2:   # dumbass hard code to fix char inputs - np.ma.size(..., axis=-2) didn't work
            num_samples = len(preprocessed_inputs[0][0])
        probs = np.zeros((self.num_models, num_samples, 5))
        for i, inputs in enumerate(preprocessed_inputs):
            probs[i] = self.models[i].predict(inputs)
        for weights in weights_generator:
            average_probs = np.average(probs, axis=0, weights=weights)
            predictions = np.argmax(average_probs, axis=1)
            acc = np.sum(predictions == labels) / num_samples
            star_err = np.sum(np.abs(predictions - labels)) / num_samples
            score = acc - star_err
            if acc > bests['acc'][0]:
                bests['acc'] = [acc, weights]
            if star_err < bests['err'][0]:
                bests['err'] = [star_err, weights]
            if score > bests['score'][0]:
                bests['score'] = [score, weights]
        return bests


        # averages the predictions
    def predict_ratings1(self, preprocessed_inputs):
        assert len(preprocessed_inputs) == self.num_models
        num_samples = len(preprocessed_inputs[0])
        if num_samples == 2:   # dumbass hard code to fix char inputs - np.ma.size(..., axis=-2) didn't work
            num_samples = len(preprocessed_inputs[0][0])
        predictions = np.zeros((self.num_models, num_samples, 5))
        for i, inputs in enumerate(preprocessed_inputs):
            predictions[i] = self.models[i].predict(inputs)
        stars = np.argmax(predictions, axis=2) + 1
        assert stars.shape == (self.num_models, num_samples)
        average_per_model = np.average(stars, axis=0, weights=self.weights)
        assert average_per_model.shape == (num_samples,)
        return np.around(average_per_model).astype(int)



name = "big_ensemble_no_challenge_5_results"
with open(os.path.join(ensemble_dir, name)) as ber:
    bests = json.load(ber)
ensemble.weights = bests['acc'][1]
err_ensemble = ensemble.copy()
err_ensemble.weights = bests['err'][1]
score_ensemble = ensemble.copy()
score_ensemble.weights = bests['score'][1]
mps1 = {"BEST_ACC_ENSEMBLE": (ensemble, ensemble_preproc), "BEST_ERR_ENSEMBLE": (err_ensemble, ensemble_preproc), "BEST_SCORE_ENSEMBLE": (score_ensemble, ensemble_preproc)}




def best_ensemble(mpns, test_set, show_results=True, avg_predictions=False):
    bests = {'acc': [0, None, None, [], ""], 'err': [5, None, None, [], ""], 'score': [-100, None, None, [], ""]}
    mets = ('acc', 'err', 'score')
    for n in range(1, len(mpns) + 1):
        for group in combinations(mpns, n):
            ensemble = EnsembleModel([(mpn[0], 0.) for mpn in group], avg_predictions=avg_predictions)
            preproc = EnsemblePreprocessor([mpn[1] for mpn in group])
            name = "-".join([mpn[2] for mpn in group])
            result = best_weights(ensemble, preproc, test_set) 
            acc, err, score = [result[met][0] for met in mets]
            if show_results:
                print("==============\n{}:\n".format(name))
                print("accuracy: {:.3f}, star-error: {:.3f}, score: {:.3f}\n".format(acc, err, score))
            acc_weights, err_weights, score_weights = [result[met][1] for met in mets]
            if acc > bests['acc'][0]:
                bests['acc'] = [acc, ensemble, preproc, acc_weights, name]
            if err < bests['err'][0]:
                bests['err'] = [err, ensemble, preproc, err_weights, name]
            if score > bests['score'][0]:
                bests['score'] = [score, ensemble, preproc, score_weights, name]
    return bests

def models_and_preprocs_from_bests(bests, avg_predictions=False):
    models_and_preprocs = {}
    for met in ('ACC', 'ERR', 'SCORE'):
        _, model, preproc, weights, name = bests[met.lower()]
        model_clone = model.copy()
        model_clone.weights = weights.copy()
        model_clone.num_models = model.num_models
        if avg_predictions:
            model_clone.predict_ratings = model_clone.predict_ratings1
        models_and_preprocs[f"BEST_{met}_ENSEMBLE"] = (model_clone, preproc)
    return models_and_preprocs


ensemble_preproc = EnsemblePreprocessor([glove_preprocessor, glove_char_preprocessor,
                                         preprocessor, preprocessor, preprocessor,
                                         char_preprocessor, char_preprocessor,
                                         bert_preprocessor])

