# Comparisons File

For our two real datasets, let's go through the different combinations of pipeline and compare the results.

## Setup

Import the library components, construct the datasets, and get the lists of methods.

In [1]:
# imports
from nlp_pipelines.pipeline import Pipeline
from nlp_pipelines.evaluate import evaluate
from nlp_pipelines.dataset import Dataset

In [2]:
# datasets

newsgroups = Dataset.from_parquet("./demo_data/sample_5_newsgroup_text.parquet", text_field="text", truth_field="label_text")

abstracts = Dataset.from_json("./demo_data/springer-127-parsed.json", text_field="abstract", truth_field="keywords")

In [3]:
# get the lists of methods

# vectorizers
from nlp_pipelines.vectorizer import __all__ as vectorizers

# classifiers
from nlp_pipelines.classifier import __all__ as classifiers

# clusterers
from nlp_pipelines.clusterer import __all__ as clusterers

# labelers
from nlp_pipelines.labeler import __all__ as labelers
from nlp_pipelines.labeler.predictive import __all__ as predictive_labelers

# preprocesors
from nlp_pipelines.preprocess import __all__ as preprocessors
preprocessors.append(None)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# method to write the result dict out
import csv

def dict_to_csv(data, output_file):
    all_metrics = set()
    for metrics in data.values():
        all_metrics.update(metrics.keys())
    all_metrics = sorted(all_metrics)

    # Write to CSV
    with open(output_file, mode='w', newline='') as f:
        writer = csv.writer(f)
        # Header
        writer.writerow(["method_chain"] + all_metrics)
        # Rows
        for method_chain, metrics in data.items():
            row = [method_chain] + [metrics.get(metric, "") for metric in all_metrics]
            writer.writerow(row)


## Clustering

Clustering is assigning each datapoint to a cluster, where the cluster meaning is not predefined.

In [5]:
cluster_test_results = {}

# combinations of preprocessor or none, vectorizer, clusterer
import itertools
import copy

combinations = itertools.product(preprocessors, vectorizers, clusterers) # pick one from each list (including None for preprocess)

# same test, train, val split
cluster_test, cluster_train = newsgroups.split(ratio=0.8, labeled=True, splitLabeled=True, seed=101)
for x in combinations:
    preproc, vec, method = x
    key = f"{str(preproc)}.{vec}.{method}"
    try:
        m = {"name": "cluster", "method": f"clusterer.{method}"}
        if method in ['Kmeans, GraphAffinity']:
            m['params'] = {'num_clusters': 5}
        pipeline_list = [
            {"name": "vectorize", "method": f"vectorizer.{vec}"},
            m
        ]
        if preproc is not None:
            pipeline_list = [{"name": "preprocess", "method": f"preprocess.{preproc}"}] + pipeline_list
        # construct pipeline
        pipeline = Pipeline(pipeline_list)
        # copy data
        train = copy.deepcopy(cluster_train)
        test = copy.deepcopy(cluster_test)
        # set data
        pipeline.set_data(train_data=train, run_data=test)
        # run
        pipeline.run()
        # evaluate
        cluster_test_results[key] = evaluate(pipeline.run_data, cluster_mode=True)
    except BaseException as e:
        print(f"ERROR in {key}", e)



ERROR in Lemmatize.Tfidf.UmapHdbscan Mix of label input types (string and number)


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  121337 lr:  0.000000 avg.loss:  2.707368 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  121709 lr:  0.000000 avg.loss:  2.685067 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  123595 lr:  0.000000 avg.loss:  2.705142 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  123038 lr:  0.000000 avg.loss:  2.688063 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in Lemmatize.SentenceEmbedding.GraphAffinity Input contains NaN.
ERROR in PosRemoval.Tfidf.UmapHdbscan Mix of label input types (string and number)


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  120801 lr:  0.000000 avg.loss:  2.703814 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122191 lr:  0.000000 avg.loss:  2.700440 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122445 lr:  0.000000 avg.loss:  2.719952 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  120497 lr:  0.000000 avg.loss:  2.710541 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in PosRemoval.SentenceEmbedding.GraphAffinity Input contains NaN.


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122274 lr:  0.000000 avg.loss:  2.703819 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122458 lr:  0.000000 avg.loss:  2.685664 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122347 lr:  0.000000 avg.loss:  2.739989 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  123371 lr:  0.000000 avg.loss:  2.697831 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in Stem.SentenceEmbedding.GraphAffinity Input contains NaN.


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122228 lr:  0.000000 avg.loss:  2.700052 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  124065 lr:  0.000000 avg.loss:  2.707220 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  121521 lr:  0.000000 avg.loss:  2.689601 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  122614 lr:  0.000000 avg.loss:  2.715562 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in StopwordRemove.SentenceEmbedding.GraphAffinity Input contains NaN.


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  115465 lr:  0.000000 avg.loss:  2.691577 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  118062 lr:  0.000000 avg.loss:  2.716027 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  115534 lr:  0.000000 avg.loss:  2.687652 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  116619 lr:  0.000000 avg.loss:  2.701928 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in TokenFilter.SentenceEmbedding.GraphAffinity Input contains NaN.
ERROR in None.Tfidf.UmapHdbscan Mix of label input types (string and number)


Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  119173 lr:  0.000000 avg.loss:  2.712927 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  116205 lr:  0.000000 avg.loss:  2.672312 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:   42734 lr:  0.000000 avg.loss:  2.701051 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  119143 lr:  0.000000 avg.loss:  2.717062 ETA:   0h 0m 0s
  w = np.where(isolated_node_mask, 1, np.sqrt(w))


ERROR in None.SentenceEmbedding.GraphAffinity Input contains NaN.


In [6]:
sorted_results = sorted(cluster_test_results.items(), key=lambda x: x[1]['f1_macro'], reverse=True)
for rank, (config, score) in enumerate(sorted_results, start=1):
    print(f"{rank:2}. {config:<40} | F1 Macro: {score['f1_macro']:.4f} | Accuracy: {score['accuracy']:.4f}")

dict_to_csv(cluster_test_results, "comparisions_clusters.csv")


 1. None.SentenceEmbedding.Kmeans            | F1 Macro: 0.4295 | Accuracy: 0.6100
 2. Stem.SentenceEmbedding.Kmeans            | F1 Macro: 0.4162 | Accuracy: 0.5900
 3. StopwordRemove.SentenceEmbedding.Kmeans  | F1 Macro: 0.4145 | Accuracy: 0.5800
 4. PosRemoval.SentenceEmbedding.Kmeans      | F1 Macro: 0.3942 | Accuracy: 0.5400
 5. Lemmatize.SentenceEmbedding.Kmeans       | F1 Macro: 0.3892 | Accuracy: 0.5300
 6. Lemmatize.SentenceEmbedding.UmapHdbscan  | F1 Macro: 0.3278 | Accuracy: 0.4500
 7. StopwordRemove.BagOfWords.UmapHdbscan    | F1 Macro: 0.3003 | Accuracy: 0.3100
 8. TokenFilter.SentenceEmbedding.Kmeans     | F1 Macro: 0.2927 | Accuracy: 0.4200
 9. TokenFilter.Tfidf.Kmeans                 | F1 Macro: 0.2621 | Accuracy: 0.3800
10. StopwordRemove.Tfidf.Kmeans              | F1 Macro: 0.2600 | Accuracy: 0.3800
11. Lemmatize.FastText.Kmeans                | F1 Macro: 0.2474 | Accuracy: 0.3800
12. TokenFilter.Tfidf.UmapHdbscan            | F1 Macro: 0.2461 | Accuracy: 0.2800
13. 

# Classification

Like clusters, but with predefined classes.

In [7]:
class_test_results = {}

newsgroup_labels = list(set(newsgroups.truths))

combinations = itertools.product(preprocessors, vectorizers, classifiers) # pick one from each list (including None for preprocess)

# same test train split within loop
class_test, class_train = newsgroups.split(ratio=0.8, labeled=True, splitLabeled=True, seed=101)
for x in combinations:
    preproc, vec, method = x
    key = f"{str(preproc)}.{vec}.{method}"
    try:
        pipeline_list = [
            {"name": "vectorize", "method": f"vectorizer.{vec}"},
            {"name": "classifier", "method": f"classifier.{method}"}
        ]
        if preproc is not None:
            pipeline_list = [{"name": "preprocess", "method": f"preprocess.{preproc}"}] + pipeline_list
        # construct pipeline
        pipeline = Pipeline(pipeline_list)
        # copy data
        train = copy.deepcopy(class_train)
        test = copy.deepcopy(class_test)
        # set data
        pipeline.set_data(train_data=train, run_data=test, possible_labels=newsgroup_labels)
        # run
        pipeline.run()
        # evaluate
        class_test_results[key] = evaluate(pipeline.run_data)
    except BaseException as e:
        print(f"ERROR in {key}", e)



Device set to use mps:0
Device set to use mps:0
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  117535 lr:  0.000000 avg.loss:  2.711748 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  113780 lr:  0.000000 avg.loss:  2.690920 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  117155 lr:  0.000000 avg.loss:  2.715235 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  116897 lr:  0.000000 avg.loss:  2.698574 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0
Device set to use mps:0
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:  118651 lr:  0.000000 avg.loss:  2.698903 ETA:   0h 0m 0s
Read 0M words
Number of words:  1976
Number of labels: 0
Progress: 100.0% words/sec/thread:   65552 lr:  0

In [8]:
sorted_results = sorted(class_test_results.items(), key=lambda x: x[1]['f1_macro'], reverse=True)
for rank, (config, score) in enumerate(sorted_results, start=1):
    print(f"{rank:2}. {config:<40} | F1 Macro: {score['f1_macro']:.4f} | Accuracy: {score['accuracy']:.4f}")

dict_to_csv(class_test_results, "comparisions_classes.csv")


 1. PosRemoval.Tfidf.SimpleNNClassifier      | F1 Macro: 0.8795 | Accuracy: 0.8900
 2. TokenFilter.Tfidf.SimpleNNClassifier     | F1 Macro: 0.8724 | Accuracy: 0.8800
 3. StopwordRemove.Tfidf.SimpleNNClassifier  | F1 Macro: 0.8514 | Accuracy: 0.8600
 4. Lemmatize.Tfidf.SimpleNNClassifier       | F1 Macro: 0.8485 | Accuracy: 0.8600
 5. None.Tfidf.SimpleNNClassifier            | F1 Macro: 0.8441 | Accuracy: 0.8500
 6. TokenFilter.BagOfWords.SimpleNNClassifier | F1 Macro: 0.8345 | Accuracy: 0.8400
 7. TokenFilter.SentenceEmbedding.LabelProp  | F1 Macro: 0.8307 | Accuracy: 0.8400
 8. Lemmatize.SentenceEmbedding.LabelProp    | F1 Macro: 0.8264 | Accuracy: 0.8400
 9. None.BagOfWords.SimpleNNClassifier       | F1 Macro: 0.8261 | Accuracy: 0.8300
10. PosRemoval.BagOfWords.SimpleNNClassifier | F1 Macro: 0.8244 | Accuracy: 0.8200
11. Stem.Tfidf.SimpleNNClassifier            | F1 Macro: 0.8241 | Accuracy: 0.8300
12. StopwordRemove.SentenceEmbedding.LabelProp | F1 Macro: 0.8214 | Accuracy: 0.8300
1

# Labeling
Since there aren't very good ways to programtically evaluate the extractive labeling (at least that I've implemented), let's only look at the predictive ones.

Like the demo, let's make a subset of abstracts where the top 10 are the only ones, and compare only the predictive labelers

In [9]:
# top 10 true keywords
possible_labels = ["mental health", "depression", "schizophrenia",
 "covid-19", "suicide", "anxiety", "loneliness",
 "psychosis", "epidemiology", "mental disorders"]

# keep only keywords in this list, in a derived dataset from abstracts

abstracts_labels = copy.deepcopy(abstracts)

abstracts_labels.truths = [[item.lower() for item in sublist if item.lower() in possible_labels] for sublist in abstracts_labels.truths]

# test and train split, remove ones with now empty keyword lists too!
abstract_test, abstract_train = abstracts_labels.split(ratio=0.8, seed=101, labeled=True, splitLabeled=True)

In [12]:
label_test_results = {}
combinations = itertools.product(preprocessors, vectorizers, predictive_labelers) # pick one from each list (including None for preprocess)

# same test train split within loop
for x in combinations:
    preproc, vec, method = x
    key = f"{str(preproc)}.{vec}.{method}"
    try:
        pipeline_list = [
            {"name": "vectorize", "method": f"vectorizer.{vec}"},
            {"name": "classifier", "method": f"labeler.{method}"}
        ]
        if preproc is not None:
            pipeline_list = [{"name": "preprocess", "method": f"preprocess.{preproc}"}] + pipeline_list
        # construct pipeline
        pipeline = Pipeline(pipeline_list)
        # copy data
        train = copy.deepcopy(abstract_train)
        test = copy.deepcopy(abstract_test)
        # set data
        pipeline.set_data(train_data=train, run_data=test, possible_labels=possible_labels)
        # run
        pipeline.run()
        # evaluate
        label_test_results[key] = evaluate(pipeline.run_data)
    except BaseException as e:
        print(f"ERROR in {key}", e)

Device set to use mps:0


ERROR in Lemmatize.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91486 lr:  0.000000 avg.loss:  2.759261 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   92152 lr:  0.000000 avg.loss:  2.711927 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   92306 lr:  0.000000 avg.loss:  2.693648 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91494 lr:  0.000000 avg.loss:  2.705535 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0


ERROR in PosRemoval.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91424 lr:  0.000000 avg.loss:  2.736913 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91435 lr:  0.000000 avg.loss:  2.760747 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91586 lr:  0.000000 avg.loss:  2.765684 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   93265 lr:  0.000000 avg.loss:  2.751650 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0


ERROR in Stem.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91357 lr:  0.000000 avg.loss:  2.763424 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   87418 lr:  0.000000 avg.loss:  2.779717 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91540 lr:  0.000000 avg.loss:  2.764707 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91387 lr:  0.000000 avg.loss:  2.748153 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0


ERROR in StopwordRemove.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91548 lr:  0.000000 avg.loss:  2.744962 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   92466 lr:  0.000000 avg.loss:  2.759615 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91537 lr:  0.000000 avg.loss:  2.705917 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   95472 lr:  0.000000 avg.loss:  2.715255 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0


ERROR in TokenFilter.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   97581 lr:  0.000000 avg.loss:  2.708606 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   96148 lr:  0.000000 avg.loss:  2.731645 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   95544 lr:  0.000000 avg.loss:  2.733090 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   96151 lr:  0.000000 avg.loss:  2.789399 ETA:   0h 0m 0s
Device set to use mps:0
Device set to use mps:0


ERROR in None.BagOfWords.ThresholdSim expected common dtype to be floating point, yet common dtype is Long


Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   89485 lr:  0.000000 avg.loss:  2.791651 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91013 lr:  0.000000 avg.loss:  2.765023 ETA:   0h 0m 0s
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   92529 lr:  0.000000 avg.loss:  2.730972 ETA:   0h 0m 0s
Device set to use mps:0
Read 0M words
Number of words:  1157
Number of labels: 0
Progress: 100.0% words/sec/thread:   91534 lr:  0.000000 avg.loss:  2.779202 ETA:   0h 0m 0s
Device set to use mps:0


In [15]:
sorted_results = sorted(label_test_results.items(), key=lambda x: x[1]['f1_macro'], reverse=True)
for rank, (config, score) in enumerate(sorted_results, start=1):
    print(f"{rank:2}. {config:<40} | F1 Macro: {score['f1_macro']:.4f} | Jaccard: {score['jaccard']:.4f}")

dict_to_csv(label_test_results, "comparisions_labels.csv")

 1. None.SentenceEmbedding.ThresholdSim      | F1 Macro: 0.4593 | Jaccard: 0.4042
 2. Lemmatize.SentenceEmbedding.ThresholdSim | F1 Macro: 0.4453 | Jaccard: 0.3917
 3. TokenFilter.SentenceEmbedding.ThresholdSim | F1 Macro: 0.4366 | Jaccard: 0.3820
 4. TokenFilter.Tfidf.ThresholdSim           | F1 Macro: 0.4280 | Jaccard: 0.3900
 5. PosRemoval.SentenceEmbedding.ThresholdSim | F1 Macro: 0.4230 | Jaccard: 0.3642
 6. StopwordRemove.Tfidf.ThresholdSim        | F1 Macro: 0.4180 | Jaccard: 0.3800
 7. PosRemoval.Tfidf.ThresholdSim            | F1 Macro: 0.3947 | Jaccard: 0.3633
 8. None.Tfidf.ThresholdSim                  | F1 Macro: 0.3947 | Jaccard: 0.3633
 9. StopwordRemove.SentenceEmbedding.ThresholdSim | F1 Macro: 0.3923 | Jaccard: 0.3387
10. StopwordRemove.BagOfWords.SimpleNNLabeler | F1 Macro: 0.3880 | Jaccard: 0.3583
11. Lemmatize.Tfidf.ThresholdSim             | F1 Macro: 0.3663 | Jaccard: 0.3350
12. TokenFilter.Tfidf.BartTagLabeler         | F1 Macro: 0.3433 | Jaccard: 0.3367
13. Tok