In [1]:
import importlib
import classification_training_utils
importlib.reload(classification_training_utils)
import utils
importlib.reload(utils)

import sys
sys.path.append('../')

import pandas as pd
from sentence_transformers import SentenceTransformer, LoggingHandler, losses, util
from sentence_transformers.datasets import SentenceLabelDataset
from sentence_transformers.readers import InputExample
from sentence_transformers.evaluation import TripletEvaluator
from sklearn.metrics.pairwise import cosine_similarity
from utils import load_model, replace_nan_with, load_big_consulting_export, callback
import numpy as np
import random
from torch.utils.data import DataLoader
from datetime import datetime
import os
from collections import defaultdict
import model_freeze as freeze
import matplotlib.pyplot as plt
plt.style.use('ggplot')


from classification_training_utils import get_big_consulting_df, collect_classification_labels, get_relevant_classifications, train, filter_relevant_classifications, get_top_values, get_news_df
from utils import create_replace_no_tags_embeddings
from transformers import EarlyStoppingCallback
import math
import torch
from tqdm import tqdm
tqdm.pandas()
import pickle

In [2]:
# def encode_w_sbert(snippet):
#     return model.encode(snippet)

# def save_batch(col_name):
#     train_temp_df = pd.read_csv(f'{dataset_dir}train.tsv', sep='\t')
#     train_temp_df['embedding'] = train_temp_df.progress_apply(
#                     lambda row: encode_w_sbert(row[col_name]), axis=1)
#     train_temp_df.to_csv(f'../classification-training-data/{col_name}/train.tsv', sep='\t')

#     test_temp_df = pd.read_csv(f'{dataset_dir}test.tsv', sep='\t')
#     test_temp_df['embedding'] = test_temp_df.progress_apply(
#                     lambda row: encode_w_sbert(row[col_name]), axis=1)
#     test_temp_df.to_csv(f'../classification-training-data/{col_name}/test.tsv', sep='\t')

#     dev_temp_df = pd.read_csv(f'{dataset_dir}dev.tsv', sep='\t')
#     dev_temp_df['embedding'] = dev_temp_df.progress_apply(
#                     lambda row: encode_w_sbert(row[col_name]), axis=1)
#     dev_temp_df.to_csv(f'../classification-training-data/{col_name}/dev.tsv', sep='\t')

# save_batch('snippet')
# save_batch('replace')

In [3]:
params = {}
params["EPOCHS"] = 1
params["UNFREEZE_LAYERS"] = 2
params["EXCLUDE_ENTITY_OTHER"] = True
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2" #"brjezierski/S3BERT" # "intfloat/e5-small-v2"
params["OCCURENCE_CUTOFF"] = 2
params["CREATE_NEW_SPLIT"] = False
params["BATCH_SIZE"] = 32
params["VAL_DEV_SIZE"] = 100
params['SNIPPET_COLUMN_NAME'] = 'replace' # ["replace_no_tags", "replace", 'snippet']
params["DATASETS"] = ["ai", "car"] # ["ai", "car", "consulting", "consulting2"]

dataset_dir = "../classification-training-data/replace/"


In [4]:
def run_pipeline(save_model=False):
    model = load_model() if not params["INITIALIZED_MODEL"] else load_model(
        model=params["INITIALIZED_MODEL"])
    if "consulting" in params["DATASETS"]:
        big_consulting_df = get_big_consulting_df(params)
        big_consulting_df = collect_classification_labels(big_consulting_df)
        big_consulting_df = get_top_values(big_consulting_df, params)
        big_consulting_all_classifications, big_consulting_relevant_classifications = get_relevant_classifications(big_consulting_df, params)
        big_consulting_df, big_consulting_classifications = filter_relevant_classifications(big_consulting_df, big_consulting_all_classifications, big_consulting_relevant_classifications)
        params["WARMUP_STEPS"] = int(len(big_consulting_df) * params["EPOCHS"] * 0.1)

    if "ai" in params["DATASETS"]:
        ai_news_df = get_news_df(params, 'ai_news')
        ai_news_df = collect_classification_labels(ai_news_df)
        if "NEW_CLASSIFICATIONS" in params and "ai" in params["NEW_CLASSIFICATIONS"]:
            with open(params["NEW_CLASSIFICATIONS"]["ai"], 'rb') as file:
                new_classifications = pickle.load(file)
            ai_news_df['top_classification'] = ai_news_df['snippet'].map(new_classifications)
        else:
            ai_news_df = get_top_values(ai_news_df, params)
        ai_news_all_classifications, ai_news_relevant_classifications = get_relevant_classifications(ai_news_df, params)
        ai_news_df, ai_news_classifications = filter_relevant_classifications(ai_news_df, ai_news_all_classifications, ai_news_relevant_classifications)
        params["WARMUP_STEPS"] = int(len(ai_news_df) * params["EPOCHS"] * 0.1)

    if "car" in params["DATASETS"]:
        car_news_df = get_news_df(params, 'car_news')
        # How about duplicating car news dataset?
        car_news_df = collect_classification_labels(car_news_df)
        if "NEW_CLASSIFICATIONS" in params and "car" in params["NEW_CLASSIFICATIONS"]:
            with open(params["NEW_CLASSIFICATIONS"]["car"], 'rb') as file:
                new_classifications = pickle.load(file)
            car_news_df['top_classification'] = car_news_df['snippet'].map(new_classifications)
        else:
            car_news_df = get_top_values(car_news_df, params)
        car_news_all_classifications, car_news_relevant_classifications = get_relevant_classifications(car_news_df, params)
        car_news_df, car_news_classifications = filter_relevant_classifications(car_news_df, car_news_all_classifications, car_news_relevant_classifications)
        print(type(car_news_classifications))
        params["WARMUP_STEPS"] = int(len(car_news_df) * params["EPOCHS"] * 0.1)

    embeddings_prefix = '../glanos-data/embeddings/'

    if params['SNIPPET_COLUMN_NAME'] == "replace_no_tags":

        if "ai" in params["DATASETS"]:
            with open(f'{embeddings_prefix}ai_news_replace_no_tags.pickle', 'rb') as f:
                ai_news_no_tags_embeddings = pickle.load(f)
            ai_news_df = create_replace_no_tags_embeddings(ai_news_df, ai_news_no_tags_embeddings)
            ai_news_df['embedding'] = ai_news_df['replace_no_tags'].map(ai_news_no_tags_embeddings)

        if "car" in params["DATASETS"]:
            with open(f'{embeddings_prefix}car_news_replace_no_tags.pickle', 'rb') as f:
                car_news_no_tags_embeddings = pickle.load(f)
            car_news_df = create_replace_no_tags_embeddings(car_news_df, car_news_no_tags_embeddings)
            car_news_df['embedding'] = car_news_df['replace_no_tags'].map(car_news_no_tags_embeddings)

        if "consulting" in params["DATASETS"]:
            with open(f'{embeddings_prefix}big_consulting_2_replace_no_tags.pickle', 'rb') as f:
                consulting_no_tags_embeddings = pickle.load(f)
            big_consulting_df = create_replace_no_tags_embeddings(big_consulting_df, consulting_no_tags_embeddings)
            big_consulting_df['embedding'] = big_consulting_df['replace_no_tags'].map(consulting_no_tags_embeddings)

    classifications_path =  "../classification-training-data/classifications.pkl"

    with open(classifications_path, 'rb') as file:
        classifications = pickle.load(file)

    training_datasets = []
    classification_lists = []
    if "consulting" in params["DATASETS"]:
        training_datasets.append(big_consulting_df)
        classification_lists.append(big_consulting_classifications)
    if "ai" in params["DATASETS"]:
        ai_news_df.drop(columns=['embedding'])
        if params["DATASETS"].count("ai") == 2:
            ai_news_df = pd.concat([ai_news_df, ai_news_df], axis=0).reset_index(drop=True) 
        training_datasets.append(ai_news_df)
        classification_lists.append(ai_news_classifications)
    if "car" in params["DATASETS"]:
        car_news_df.drop(columns=['embedding'])
        if params["DATASETS"].count("car") == 2:
            car_news_df = pd.concat([car_news_df, car_news_df], axis=0).reset_index(drop=True) 
        training_datasets.append(car_news_df)
        classification_lists.append(car_news_classifications)

#     classification_lists = [classifications] * len(training_datasets)            

    print("Using datasets:", params["DATASETS"])

    model_fit, test_evaluator, model = train(training_datasets, classification_lists, params, dataset_dir, model, save_model=save_model)

    print('Score', model.evaluate(test_evaluator))

    frozen_model = load_model() if not params["INITIALIZED_MODEL"] else load_model(
        model=params["INITIALIZED_MODEL"])
    print('Baseline', frozen_model.evaluate(test_evaluator))


In [5]:
# params["USE_REPLACE_DATA"] = True
# big_consulting_replace_df = get_big_consulting_df(params)
# params["USE_REPLACE_DATA"] = False
# big_consulting_no_replace_df = get_big_consulting_df(params)
# df = pd.concat([big_consulting_replace_df, big_consulting_no_replace_df, ai_news_df, car_news_df], axis=0).reset_index(drop=True) #big_consulting_df, ai_news_df, car_news_df
# df = collect_classification_labels(df, verbose=False)
# df = get_top_values(df, params)
# all_classifications, relevant_classifications = get_relevant_classifications(df, params)
# df, classifications = filter_relevant_classifications(df, all_classifications, relevant_classifications)
# params["WARMUP_STEPS"] = int(len(df) * params["EPOCHS"] * 0.1)  # 10% of train data

classifications_path =  "../classification-training-data/classifications.pkl"
# with open("../classification-training-data/classifications.pkl", 'wb') as file:
#     pickle.dump(classifications, file)
    
with open(classifications_path, 'rb') as file:
    classifications = pickle.load(file)


In [6]:

# Accuracy Cosine Distance
# baseline - 0.77
# e=2 Using original data - 0.83
# BEST e=2 Using original data, unfreezing 2 last layers, only including words that occur at least 2 times - 0.93
# e=2 Using original data, unfreezing 2 last layers, only including words that occur at least 3 times - 0.93
# e=2 Using original data, unfreezing 2 last layers, only including words that occur at least 4 times - 0.92
# e=1 Using original data, unfreezing 2 last layers, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.89
# e=2 Using original data, unfreezing 2 last layers, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.92

# e=2 Using original and replacement data, unfreezing 2 last layers, excluding Entity and Other labels, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.91
# e=2 Using original and replacement data, unfreezing 2 last layers, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.88
# e=1 Using original and replacement data, unfreezing 2 last layers, excluding Entity and Other labels, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.92

# e=2 Using original data, unfreezing 2 last layers, starting with intfloat/e5-small-v2, only including words that occur at least 2 times - 0.91
# e=2 Using original and replacement data, unfreezing 2 last layers, excluding Entity and Other labels, starting with intfloat/e5-small-v2, only including words that occur at least 2 times - 0.89

# e=2 Using replacement data, creating a new train-dev-test split, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.96, 0.88 (baseline)
# e=1 Using replacement data, creating a new train-dev-test split, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.94
# e=2 Using replacement data, creating a new train-dev-test split, excluding Entity and Other labels, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.94
# e=1 Using replacement data, creating a new train-dev-test split, excluding Entity and Other labels, starting with brjezierski/S3BERT, only including words that occur at least 2 times - 0.96

# e=1 Using replacement data, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size 67325+38639


# Thesis testing
# From normal embeddings on combined ai+car /w new test test: 78 vs. 80


# Experiment 1

In [7]:
# params["CREATE_NEW_SPLIT"] = False
# print('Ex2')
# params['SNIPPET_COLUMN_NAME'] = 'replace'
# dataset_dir = "../classification-training-data/replace/"
# model1 = run_pipeline()

# print('Ex3')
# params['SNIPPET_COLUMN_NAME'] = 'replace_no_tags'
# dataset_dir = "../classification-training-data/replace_no_tags/"
# model2 = run_pipeline()

params["VAL_DEV_SIZE"] = 400
params["CREATE_NEW_SPLIT"] = True

print('Ex4')
params['SNIPPET_COLUMN_NAME'] = 'snippet'
dataset_dir = "../classification-training-data/snippet_new/"
run_pipeline()
# Score 0.7604166666666666
# Baseline 0.7569444444444444

print('Ex5')
params['SNIPPET_COLUMN_NAME'] = 'replace'
dataset_dir = "../classification-training-data/replace_new/"
run_pipeline()
# Score 0.7927631578947368
# Baseline 0.7269736842105263

print('Ex6')
params['SNIPPET_COLUMN_NAME'] = 'replace_no_tags'
dataset_dir = "../classification-training-data/replace_no_tags_new/"
run_pipeline()
# Score 0.8125
# Baseline 0.7927631578947368


params["EPOCHS"] = 2
params["CREATE_NEW_SPLIT"] = False


print('Ex7')
params['SNIPPET_COLUMN_NAME'] = 'snippet'
dataset_dir = "../classification-training-data/snippet_new/"
run_pipeline()
# Score 0.7708333333333334
# Baseline 0.7569444444444444

print('Ex8')
params['SNIPPET_COLUMN_NAME'] = 'replace'
dataset_dir = "../classification-training-data/replace_new/"
run_pipeline()
# Score 0.7894736842105263
# Baseline 0.7269736842105263

print('Ex9')
params['SNIPPET_COLUMN_NAME'] = 'replace_no_tags'
dataset_dir = "../classification-training-data/replace_no_tags_new/"
run_pipeline()
# Score 0.7730263157894737
# Baseline 0.7927631578947368

Ex4
<class 'dict'>
Using datasets: ['ai', 'car']
Training with multiple objectives


KeyboardInterrupt: 

# Experiment 2

In [14]:
print("Ex2.1")
params["CREATE_NEW_SPLIT"] = True
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["EPOCHS"] = 1
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline(save_model=True)
# Best
# Score 0.87
# Baseline 0.79

print("Ex2.2")
params["CREATE_NEW_SPLIT"] = True
params["SNIPPET_COLUMN_NAME"] = 'replace'
params["EPOCHS"] = 1
dataset_dir = "../classification-training-data/replace_seperate_classifications/"
run_pipeline()
# Score 0.79
# Baseline 0.82

print("Ex2.3")
params["CREATE_NEW_SPLIT"] = True
params["SNIPPET_COLUMN_NAME"] = 'replace_no_tags'
params["EPOCHS"] = 1
dataset_dir = "../classification-training-data/replace_no_tags_seperate_classifications/"
run_pipeline()
# Score 0.8
# Baseline 0.77

Ex2.1
car_news_df 38834
car_news_df 77668
Using datasets: ['ai', 'car', 'car']
Training with multiple objectives
Creating new dev and test triplets
e=1, embeddings from snippet, creating a new train-dev-test split, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67312

Training data size 67312
Validation data size 100
Test data size 100
Creating new dev and test triplets
e=1, embeddings from snippet, creating a new train-dev-test split, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size77468

Training data size 77468
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2103 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.79
Score 0.87
Baseline 0.79


In [6]:
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 2

print("Ex2.4")
params["SNIPPET_COLUMN_NAME"] = 'snippet'
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.81
# Baseline 0.72

print("Ex2.5")
params["SNIPPET_COLUMN_NAME"] = 'replace'
dataset_dir = "../classification-training-data/replace_seperate_classifications/"
run_pipeline()
# Score 0.8
# Baseline 0.82

print("Ex2.6")
params["SNIPPET_COLUMN_NAME"] = 'replace_no_tags'
dataset_dir = "../classification-training-data/replace_no_tags_seperate_classifications/"
run_pipeline()
# Score 0.81
# Baseline 0.77


Ex2.4
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=2, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67312

Training data size 67312
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=2, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size38634

Training data size 38634
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.83


Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 1, step -1: 0.76
Score 0.81
Baseline 0.72
Ex2.5
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=2, embeddings from replace, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67314

Training data size 67314
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=2, embeddings from replace, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size38634

Training data size 38634
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.86


Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 1, step -1: 0.82
Score 0.8
Baseline 0.82
Ex2.6
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=2, embeddings from replace_no_tags, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67314

Training data size 67314
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=2, embeddings from replace_no_tags, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size38634

Training data size 38634
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.8


Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 1, step -1: 0.78
Score 0.81
Baseline 0.77


# Experiment 3

In [11]:
# two times the dataset
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 2
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 2
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"

print("Ex2.6")
params["SNIPPET_COLUMN_NAME"] = 'snippet'
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.86
# Baseline 0.79
# Why?

Ex2.6
car_news_df 38834
car_news_df 77668
Using datasets: ['ai', 'car', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size77302

Training data size 77302
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/2104 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.8
Score 0.86
Baseline 0.79


# Experiment 4

In [7]:
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 2
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"


# print("Ex4.2")
# params["UNFREEZE_LAYERS"] = 1
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.81
# # Baseline 0.79


# print("Ex4.3")
# params["UNFREEZE_LAYERS"] = 2
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.85
# # Baseline 0.79


# print("Ex4.4")
# params["UNFREEZE_LAYERS"] = 3
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.86
# # Baseline 0.79


# print("Ex4.5")
# params["UNFREEZE_LAYERS"] = 4
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.87
# # Baseline 0.79


# print("Ex4.6")
# params["UNFREEZE_LAYERS"] = 5
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.92
# # Baseline 0.79

# print("Ex4.7")
# params["UNFREEZE_LAYERS"] = 6
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.92
# # Baseline 0.79


print("Ex4.8")
params["UNFREEZE_LAYERS"] = 7
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.87
# Baseline 0.79

print("Ex4.9")
params["UNFREEZE_LAYERS"] = 8
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.9
# Baseline 0.79

print("Ex4.10")
params["UNFREEZE_LAYERS"] = 9
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.89
# Baseline 0.79

print("Ex4.11")
params["UNFREEZE_LAYERS"] = 10
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.87
# Baseline 0.79

Ex4.8
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 7 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 7 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.84
Score 0.87
Baseline 0.79
Ex4.9
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 8 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 8 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.87
Score 0.9
Baseline 0.79
Ex4.10
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 9 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 9 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.89
Score 0.89
Baseline 0.79
Ex4.11
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 10 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 10 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 2 times, training data size 38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.9
Score 0.87
Baseline 0.79


# Experiment 5

In [7]:
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 2
params["OCCURENCE_CUTOFF"] = 2
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"

print("Ex5.1")
params["EXCLUDE_ENTITY_OTHER"] = True
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.85
# Baseline 0.79

print("Ex5.2")
params["EXCLUDE_ENTITY_OTHER"] = False
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.8
# Baseline 0.79

Ex5.1
car_news_df 38834
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, only including words that occur at least 2 times, training data size38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.81
Score 0.85
Baseline 0.79
Ex5.2
car_news_df 177800
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, only including words that occur at least 2 times, training data size262127

Training data size 262127
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, only including words that occur at least 2 times, training data size177617

Training data size 177617
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/5550 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.84
Score 0.8
Baseline 0.79


# Experiment 6

In [7]:
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 2
params["EXCLUDE_ENTITY_OTHER"] = True
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"

# print("Ex6.1")
# params["OCCURENCE_CUTOFF"] = 0
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.86
# # Baseline 0.79

# print("Ex6.2")
# params["OCCURENCE_CUTOFF"] = 1
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.85
# # Baseline 0.79

# print("Ex6.3")
# params["OCCURENCE_CUTOFF"] = 2
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.83
# # Baseline 0.79

# print("Ex6.4")
# params["OCCURENCE_CUTOFF"] = 3
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.87
# # Baseline 0.79

# print("Ex6.5")
# params["OCCURENCE_CUTOFF"] = 4
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.86
# # Baseline 0.79

# print("Ex6.6")
# params["OCCURENCE_CUTOFF"] = 5
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.9
# # Baseline 0.79

# print("Ex6.7")
# params["OCCURENCE_CUTOFF"] = 6
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.89
# # Baseline 0.79

# print("Ex6.8")
# params["OCCURENCE_CUTOFF"] = 7
# dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
# run_pipeline()
# # Score 0.88
# # Baseline 0.79

print("Ex6.9")
params["OCCURENCE_CUTOFF"] = 8
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.89
# Baseline 0.79

print("Ex6.10")
params["OCCURENCE_CUTOFF"] = 9
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.89
# Baseline 0.79

print("Ex6.11")
params["OCCURENCE_CUTOFF"] = 10
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline()
# Score 0.86
# Baseline 0.79


Ex6.9
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 8 times, training data size 67227

Training data size 67227
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 8 times, training data size 38565

Training data size 38565
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1205 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.81
Score 0.89
Baseline 0.79
Ex6.10
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 9 times, training data size 67219

Training data size 67219
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 9 times, training data size 38549

Training data size 38549
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1204 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.84
Score 0.89
Baseline 0.79
Ex6.11
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 10 times, training data size 67210

Training data size 67210
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 10 times, training data size 38531

Training data size 38531
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1204 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.87
Score 0.86
Baseline 0.79


# Experiment 7

In [16]:
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 2
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 2
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"

# print("Ex7.1")
# params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"
# run_pipeline()
# Score 0.87
# Baseline 0.79

# print("Ex7.2")
# params["INITIALIZED_MODEL"] = "brjezierski/S3BERT"
# run_pipeline()
# # Score 0.86
# # Baseline 0.79

# print("Ex7.3")
# params["INITIALIZED_MODEL"] = "intfloat/e5-small-v2"
# run_pipeline()
# # Score 0.8
# # Baseline 0.76

# print("Ex7.4")
# params["INITIALIZED_MODEL"] = "thenlper/gte-base"
# run_pipeline()
# # Score 0.9
# # Baseline 0.8

# print("Ex7.5")
# params["INITIALIZED_MODEL"] = "BAAI/bge-base-en-v1.5"
# run_pipeline()
# # Score 0.9
# # Baseline 0.78

# print("Ex7.6")
# params["INITIALIZED_MODEL"] = "intfloat/e5-base-v2"
# run_pipeline()
# # Score 0.89
# # Baseline 0.81

# print("Ex7.7")
# params["INITIALIZED_MODEL"] = "BAAI/bge-small-en-v1.5"
# run_pipeline()
# Score 0.84
# Baseline 0.77

# print("Ex7.8")
# params["INITIALIZED_MODEL"] = "intfloat/e5-small-v2"
# run_pipeline()
# # Score 0.82
# # Baseline 0.76

# print("Ex7.9")
# params["INITIALIZED_MODEL"] = "thenlper/gte-small"
# run_pipeline()
# # Score 0.81
# # Baseline 0.78

print("Ex7.10")
params["INITIALIZED_MODEL"] = "all-mpnet-base-v2"
run_pipeline()
# Score 0.91
# Baseline 0.79


Ex7.10
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-mpnet-base-v2, only including words that occur at least 2 times, training data size 67329

Training data size 67329
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 2 last layers, excluding Entity and Other labels, starting with all-mpnet-base-v2, only including words that occur at least 2 times, training data size 38651

Training data size 38651
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.91
Score 0.91
Baseline 0.79


# Experiment 8

In [15]:
# best configuration - this or S3BERT?

print("Ex8.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline(save_model=True)

#S3BERT 0.89
#SBERT 0.9

Ex8.1
car_news_df 38818
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size67319

Training data size 67319
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size38635

Training data size 38635
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.8
Score 0.9
Baseline 0.79


# Experiment 9

In [6]:
# combined consulting sim->ai+car class

print("Ex9.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = TrueBAAI/bge-small-en-v1.5
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "brjezierski/sentence-embeddings-similarity"
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline(save_model=True)

# Score 0.89
# Baseline 0.79


Ex9.1
car_news_df 38818
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity, only including words that occur at least 3 times, training data size67319

Training data size 67319
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity, only including words that occur at least 3 times, training data size38635

Training data size 38635
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.83
Score 0.89
Baseline 0.79


# Experiment 10

In [8]:
# combined ai+car sim->ai+car class

print("Ex10.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "brjezierski/sentence-embeddings-similarity-ai-car"
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"
run_pipeline(save_model=True)

# Score 0.93
# Baseline 0.8


Ex10.1


KeyboardInterrupt: 

# Experiment 11

In [8]:
# - add labeled data - from best ai+car

print("Ex11.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"
params["NEW_CLASSIFICATIONS"] = {
    "ai": "../classification-training-data/BERTopic_new_classifications/ai.pkl",
    "car": "../classification-training-data/BERTopic_new_classifications/car.pkl",
    "joint": "../classification-training-data/BERTopic_new_classifications/ai_car.pkl",
}
dataset_dir = "../classification-training-data/snippet_seperate_classifications_new_classifications/"
run_pipeline(save_model=True)

# Score 0.9111111111111111
# Baseline 0.8333333333333334

Ex11.1
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity-ai-car, only including words that occur at least 3 times, training data size 262090

Training data size 262090
Validation data size 94
Test data size 90
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity-ai-car, only including words that occur at least 3 times, training data size 177369

Training data size 177369
Validation data size 94
Test data size 90
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/5542 [00:00<?, ?it/s]

# Experiment 12

In [7]:
# - add labeled data - from best ai+car - new split

print("Ex12.1")
params["CREATE_NEW_SPLIT"] = True
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"
params["NEW_CLASSIFICATIONS"] = {
    "ai": "../classification-training-data/BERTopic_new_classifications/ai.pkl",
    "car": "../classification-training-data/BERTopic_new_classifications/car.pkl",
    "joint": "../classification-training-data/BERTopic_new_classifications/ai_car.pkl",
}
dataset_dir = "../classification-training-data/snippet_seperate_classifications_new_classifications_create_new_split/"
run_pipeline(save_model=True)

# Score 0.8
# Baseline 0.74

Ex12.1
Using datasets: ['ai', 'car']
Training with multiple objectives
Creating new dev and test triplets
e=1, embeddings from snippet, creating a new train-dev-test split, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size 262073

Training data size 262073
Validation data size 100
Test data size 100
Creating new dev and test triplets
e=1, embeddings from snippet, creating a new train-dev-test split, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size 177352

Training data size 177352
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/5542 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.84
Score 0.8
Baseline 0.74


# Experiment 13

In [6]:
# - best classification on ai+car (data augmentation) and - combined ai+car sim->class

print("Ex11.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "brjezierski/sentence-embeddings-similarity-ai-car"
params["NEW_CLASSIFICATIONS"] = {
    "ai": "../classification-training-data/BERTopic_new_classifications/ai.pkl",
    "car": "../classification-training-data/BERTopic_new_classifications/car.pkl",
    "joint": "../classification-training-data/BERTopic_new_classifications/ai_car.pkl",
}
dataset_dir = "../classification-training-data/snippet_seperate_classifications_new_classifications/"
run_pipeline(save_model=True)

Ex11.1
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity-ai-car, only including words that occur at least 3 times, training data size 262090

Training data size 262090
Validation data size 94
Test data size 90
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with brjezierski/sentence-embeddings-similarity-ai-car, only including words that occur at least 3 times, training data size 177369

Training data size 177369
Validation data size 94
Test data size 90
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/5542 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.8297872340425532
Score 0.8777777777777778
Baseline 0.8444444444444444


# Experiment 14

In [8]:
print("Ex14.1")
params["CREATE_NEW_SPLIT"] = False
params["EPOCHS"] = 1
params["DATASETS"] = ["ai", "car"]
params["SNIPPET_COLUMN_NAME"] = 'snippet'
params["UNFREEZE_LAYERS"] = 5
params["EXCLUDE_ENTITY_OTHER"] = True
params["OCCURENCE_CUTOFF"] = 3
params["INITIALIZED_MODEL"] = "all-MiniLM-L12-v2"
params["LOSS"] = "BatchHardTripletLoss"
dataset_dir = "../classification-training-data/snippet_seperate_classifications/"

run_pipeline()

Ex14.1
Using datasets: ['ai', 'car']
Training with multiple objectives
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size 67319

Training data size 67319
Validation data size 100
Test data size 100
Reading existing dev and test triplets
e=1, embeddings from snippet, unfreezing 5 last layers, excluding Entity and Other labels, starting with all-MiniLM-L12-v2, only including words that occur at least 3 times, training data size 38635

Training data size 38635
Validation data size 100
Test data size 100
Performance before fine-tuning:


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1207 [00:00<?, ?it/s]

Score at epoch 0, step -1: 0.83
Score 0.89
Baseline 0.79
