# 5. Review Features: Polarity

## Setup

In [2]:
import ast
import nltk
import numpy as np
import pandas as pd
import re
from sklearn.dummy import DummyClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, auc, classification_report, ConfusionMatrixDisplay, confusion_matrix, precision_recall_curve, roc_curve
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.naive_bayes import ComplementNB, MultinomialNB
from sklearn.svm import LinearSVC

In [3]:
nltk.download('stopwords')
from nltk.corpus import stopwords
languages = [
    'arabic', 'azerbaijani', 'bengali', 'danish', 'dutch', 'english',
    'finnish', 'french', 'german', 'greek', 'hungarian', 'indonesian',
    'italian', 'kazakh', 'nepali', 'norwegian', 'portuguese', 'romanian',
    'russian', 'slovene', 'spanish', 'swedish', 'tajik', 'turkish'
]
english_stopwords = set(stopwords.words('english'))
all_stopwords = set()
for language in languages:
    all_stopwords |= set(stopwords.words(language))

[nltk_data] Downloading package stopwords to C:\Users\Conor Mac
[nltk_data]     Amhlaoibh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
DIR_SAMPLE = '../data/samples/'
PATH_SAMPLES = DIR_SAMPLE + 'review_sentiment/%s.csv'
DIR_RESULT = '../data/results/bert/'
PATH_RESULTS_HP = DIR_RESULT + 'review_polarity/hyperparams/eng_eq_any_100000_%d_train.txt'
PATH_RESULTS_OPT = DIR_RESULT + 'review_polarity/optimal/%s_10_%s.txt'

## Data Reading and Writing

In [5]:
def preprocess_text(text, is_english=True):
    if is_english:
        text = text.lower()
        stopword_list = english_stopwords
    else:
        stopword_list = all_stopwords
    text = re.sub(r'[^\w\s]', ' ', text, re.UNICODE)
    text = ' '.join([
        word
        for word in text.split()
        if word not in stopword_list
    ])
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [6]:
def read_data(sample_name, train_size=0.9, seed=None):
    df = pd.read_csv(PATH_SAMPLES % sample_name)[['polarity', 'text']].reset_index(drop=True)
    if sample_name.startswith('eng'):
        df['text'] = df['text'].apply(lambda text: preprocess_text(text, is_english=True))
    else:
        df['text'] = df['text'].apply(lambda text: preprocess_text(text, is_english=False))
    train, validation = train_test_split(df, train_size=train_size, random_state=seed)
    return train, validation

## Hyperparameter Tuning

In [None]:
sample_name = 'eng_eq_any_100000'
seed = 1337
train, validation = read_data(sample_name, seed=seed)
Xt, Yt = train['text'].tolist(), train['polarity'].tolist()
Xv, Yv = validation['text'].tolist(), validation['polarity'].tolist()

In [None]:
# fit TFIDF models first
ngram_ranges = [(1, 2), (1, 3), (1, 4)]
Xt_tfidfs, Xv_tfidfs = [], []
for ngram_range in ngram_ranges:
    tfidf = TfidfVectorizer(ngram_range=ngram_range, binary=True, smooth_idf=False)
    Xt_tfidfs.append(tfidf.fit_transform(Xt))
    Xv_tfidfs.append(tfidf.transform(Xv))

In [None]:
results = {}

In [None]:
results['bl'] = {}
bl_freq = DummyClassifier(strategy='most_frequent')
scores = cross_val_score(bl_freq, Xt, Yt, scoring='accuracy', cv=5)
mean, std = round(scores.mean(), 4), round(scores.std(), 4)
results['bl']['freq'] = (mean, std)
bl_rand = DummyClassifier(strategy='uniform')
scores = cross_val_score(bl_rand, Xt, Yt, scoring='accuracy', cv=5)
mean, std = round(scores.mean(), 4), round(scores.std(), 4)
results['bl']['rand'] = (mean, std)

In [None]:
for i, ngram_range in enumerate(ngram_ranges):
    print(f'=== TFIDF: ngram_range=(1,{ngram_range[1]}) ===')
    Xt_tfidf = Xt_tfidfs[i]
    Xv_tfidf = Xv_tfidfs[i]
    results[ngram_range] = {}
    # MultinomialNB
    alphas = [0.01, 0.1, 1, 10]
    results[ngram_range]['mnb'] = {}
    for alpha in alphas:
        print(f'>>> MultinomialNB (alpha={alpha}) <<<')
        model = MultinomialNB(alpha=alpha)
        scores = cross_val_score(model, Xt_tfidf, Yt, scoring='accuracy', cv=5)
        mean, std = round(scores.mean(), 4), round(scores.std(), 4)
        print(f'mean={mean}, std={std}')
        results[ngram_range]['mnb'][alpha] = (mean, std)
    # ComplementNB
    alphas = [0.01, 0.1, 1, 10]
    results[ngram_range]['cnb'] = {}
    for alpha in alphas:
        print(f'>>> ComplementNB (alpha={alpha}) <<<')
        model = ComplementNB(alpha=alpha)
        scores = cross_val_score(model, Xt_tfidf, Yt, scoring='accuracy', cv=5)
        mean, std = round(scores.mean(), 4), round(scores.std(), 4)
        print(f'mean={mean}, std={std}')
        results[ngram_range]['cnb'][alpha] = (mean, std)
    # SGDClassifier
    alphas = [0.0001, 0.00001, 0.000001]
    results[ngram_range]['sgd'] = {}
    for alpha in alphas:
        print(f'>>> SGDClassifier (alpha={alpha}) <<<')
        model = SGDClassifier(penalty='l2', alpha=alpha, max_iter=100)
        scores = cross_val_score(model, Xt_tfidf, Yt, scoring='accuracy', cv=5)
        mean, std = round(scores.mean(), 4), round(scores.std(), 4)
        print(f'mean={mean}, std={std}')
        results[ngram_range]['sgd'][alpha] = (mean, std)
    # LinearSVC
    Cs = [0.01, 0.1, 1, 10]
    results[ngram_range]['lsvc'] = {}
    for C in Cs:
        print(f'>>> LinearSVC (C={C}) <<<')
        model = LinearSVC(penalty='l2', C=C, max_iter=100)
        scores = cross_val_score(model, Xt_tfidf, Yt, scoring='accuracy', cv=5)
        mean, std = round(scores.mean(), 4), round(scores.std(), 4)
        print(f'mean={mean}, std={std}')
        results[ngram_range]['lsvc'][C] = (mean, std)

In [None]:
results

## Plotting Setup

In [7]:
import matplotlib.pyplot as plt
import numpy as np
from cycler import cycler
from matplotlib.patches import ConnectionPatch
from matplotlib.ticker import AutoMinorLocator, ScalarFormatter

In [8]:
FONT_SIZE_S = 15
FONT_SIZE_L = 18
TICK_DIR = 'in'
TICK_SIZE_S = 3.0
TICK_SIZE_L = 5.0

def init_plt():
    plt.style.use('default')
    plt.rcParams['text.usetex'] = True
    plt.rcParams['font.size'] = FONT_SIZE_S
    plt.rcParams['legend.fontsize'] = FONT_SIZE_S
    plt.rcParams['xtick.direction'] = TICK_DIR
    plt.rcParams['ytick.direction'] = TICK_DIR
    plt.rcParams['xtick.major.size'] = TICK_SIZE_L
    plt.rcParams['xtick.minor.size'] = TICK_SIZE_S
    plt.rcParams['ytick.major.size'] = TICK_SIZE_L
    plt.rcParams['ytick.minor.size'] = TICK_SIZE_S
    plt.rcParams['axes.linewidth'] = 0.8
    plt.rcParams['axes.formatter.limits'] = (-9, 10)
    plt.rcParams['legend.handlelength'] = 2.0
    plt.rcParams['savefig.dpi'] = 400
    plt.rcParams['savefig.bbox'] = 'tight'

def init_ax(ax, only_vertical=False):
    if not only_vertical:
        ax.xaxis.set_minor_locator(AutoMinorLocator())
    ax.yaxis.set_minor_locator(AutoMinorLocator())
    return ax

def remove_log_ticks(ax):
    plt.rcParams['xtick.minor.size'] = 0

init_plt()

In [9]:
DIR_FIGS = '../figures/04_features/review_polarity/'

## Plot Base Hyperparameter Results

In [None]:
def convert_result_to_data(result):
    X, Y, E = [], [], []
    for x, (y, e) in result.items():
        X.append(x)
        Y.append(y)
        E.append(e)
    return X, Y, E

def plot_base_hp_results(results):
    # line styles
    # set up subplots
    fig = plt.figure(figsize=(9, 6))
    plt.subplots_adjust(bottom=0, left=0, top=1, right=1, wspace=0.2)
    p1 = fig.add_subplot(2, 2, 1)
    p2 = fig.add_subplot(2, 2, 2)
    p3 = fig.add_subplot(2, 2, (3, 4))
    styles = (cycler('marker', ['^','o', 'D', 'x', '2']) * cycler('color', ['#aaa', '#111', '#666']))
    p1.set_prop_cycle(styles)
    p2.set_prop_cycle(styles)
    p3.set_prop_cycle(styles)
    # plot actual results
    for label in ['mnb', 'cnb', 'sgd', 'lsvc']:
        for N in [2, 3, 4]:
            X, Y, E = convert_result_to_data(results[(1, N)][label])
            p1.plot(X, Y, label=f'${N}$-gram {label.upper()}')
            p2.plot(X, Y, label=f'${N}$-gram {label.upper()}')
            p3.plot(X, Y, label=f'${N}$-gram {label.upper()}')
    # plot baseline results
    X_rng = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1]
    for label in ['freq', 'rand']:
        Y = [results['bl'][label][0]] * len(X_rng)
        E = [results['bl'][label][1]] * len(X_rng)
        label_name = {'freq':'Frequent','rand':'Random'}[label]
        p1.plot(X_rng, Y, label=f'Baseline {label_name}')
        p2.plot(X_rng, Y, label=f'Baseline {label_name}')
        p3.plot(X_rng, Y, label=f'Baseline {label_name}')
    # plot configuration
    # plot 1
    p1.set_xlim([7e-7, 1.5e-4])
    p1.set_ylim([0.7, 0.85])
    p1.set_xscale('log')
    p1.set_ylabel('Accuracy')
    init_ax(p1, only_vertical=True)
    remove_log_ticks(p1)
    # plot 2
    p2.set_xlim([7e-3, 1.5e1])
    p2.set_ylim([0.7, 0.85])
    p2.set_xscale('log')
    init_ax(p2, only_vertical=True)
    remove_log_ticks(p2)
    # plot 3
    p3.set_xlim([7e-7, 1.5e1])
    p3.set_ylim([0.4, 1.0])
    p3.set_xscale('log')
    p3.set_xlabel(f'Hyperparameter ($\\alpha, C$)')
    p3.set_ylabel('Accuracy')
    init_ax(p3, only_vertical=True)
    remove_log_ticks(p3)
    # connect plots
    p3.fill_between((1e-6, 1e-4), 0.7, 0.85, facecolor='#eaeaea')
    p3.fill_between((1e-2, 1e1), 0.7, 0.85, facecolor='#eaeaea')
    fig.add_artist(ConnectionPatch(xyA=(7e-7, 0.7), coordsA=p1.transData, xyB=(1e-6, 0.85), coordsB=p3.transData, color='#aaa'))
    fig.add_artist(ConnectionPatch(xyA=(1.5e-4, 0.7), coordsA=p1.transData, xyB=(1e-4, 0.85), coordsB=p3.transData, color='#aaa'))
    fig.add_artist(ConnectionPatch(xyA=(7e-3, 0.7), coordsA=p2.transData, xyB=(1e-2, 0.85), coordsB=p3.transData, color='#aaa'))
    fig.add_artist(ConnectionPatch(xyA=(1.5e1, 0.7), coordsA=p2.transData, xyB=(1e1, 0.85), coordsB=p3.transData, color='#aaa'))
    # finalise
    p2.legend(loc='upper left', bbox_to_anchor=(1.025, 1.025))
    plt.savefig(DIR_FIGS + f'plot_hyperparams_base.png')
    plt.show()

In [None]:
plot_base_hp_results(results)

## Plot Bert Hyperparameter Results

In [None]:
def load_bert_train_results(filename):
    with open(filename, 'r') as f:
        data = ast.literal_eval(f.readlines()[1])
    return data

In [None]:
def plot_bert_hp_results():
    # set up plot
    styles = (cycler('color', ['#aaa', '#111', '#666']) * cycler('marker', ['2', 'x']))
    ax = plt.gca()
    ax.set_prop_cycle(styles)
    # load and plot results
    batch_sizes = [16, 32, 64]
    num_epochs = [2, 3, 4]
    i = 0
    for batch_size in batch_sizes:
        Y1, Y2 = [], []
        for num_epoch in num_epochs:
            data = load_bert_train_results(PATH_RESULTS_HP % i)
            Y1.append(data['accuracy'][-1])
            Y2.append(data['val_accuracy'][-1])
            i += 1
        plt.plot(num_epochs, Y1, label=f'Training ($n = {batch_size}$)')
        plt.plot(num_epochs, Y2, label=f'Validation ($n = {batch_size}$)')
    # plot configuration
    plt.xticks(num_epochs)
    plt.ylim([0.8, 1.0])
    plt.xlabel('Number of epochs')
    plt.ylabel('Accuracy')
    # change legend order
    hs, ls = ax.get_legend_handles_labels()
    ixs = [1, 3, 5, 0, 2, 4]
    plt.legend([hs[ix] for ix in ixs], [ls[ix] for ix in ixs], loc='upper left', bbox_to_anchor=(1, 1))
    # save and show
    plt.savefig(DIR_FIGS + f'plot_hyperparams_bert.png')
    plt.show()

In [None]:
plot_bert_hp_results()

## Full Results

In [13]:
def load_bert_test_results(sample_name):
    path_train = PATH_RESULTS_OPT % (sample_name, 'train')
    path_test = PATH_RESULTS_OPT % (sample_name, 'test')
    with open(path_train, 'r') as f:
        data = ast.literal_eval(f.readlines()[1])
        train_acc = data['accuracy'][-1]
        val_acc = data['val_accuracy'][-1]
    with open(path_test, 'r') as f:
        Yt, Yp, Yproba = map(ast.literal_eval, f.readlines())
    return train_acc, val_acc, Yt, Yp, np.array(Yproba)

In [None]:
def get_base_results_for_sample(sample_name, seed=None):
    # load sample data
    train, validation = read_data(sample_name, seed=seed)
    Xt, Yt = train['text'].tolist(), train['polarity'].tolist()
    Xv, Yv = validation['text'].tolist(), validation['polarity'].tolist()
    # vectorise sample data
    tfidf3 = TfidfVectorizer(ngram_range=(1, 3), binary=True, smooth_idf=False)
    Xt_tfidf3 = tfidf3.fit_transform(Xt)
    Xv_tfidf3 = tfidf3.transform(Xv)
    tfidf4 = TfidfVectorizer(ngram_range=(1, 4), binary=True, smooth_idf=False)
    Xt_tfidf4 = tfidf4.fit_transform(Xt)
    Xv_tfidf4 = tfidf4.transform(Xv)
    # init results
    results = {}
    # get results for baseline
    model_bl = DummyClassifier(strategy='most_frequent')
    model_bl.fit(Xt, Yt)
    results['bl'] = model_bl.score(Xv, Yv)
    print('>>>', 'bl =', results['bl'])
    # get results for MultinomialNB
    model_mnb = MultinomialNB(alpha=1)
    model_mnb.fit(Xt_tfidf4, Yt)
    results['mnb'] = model_mnb.score(Xv_tfidf4, Yv)
    print('>>>', 'mnb =', results['mnb'])
    # get results for ComplementNB
    model_cnb = ComplementNB(alpha=1)
    model_cnb.fit(Xt_tfidf4, Yt)
    results['cnb'] = model_cnb.score(Xv_tfidf4, Yv)
    print('>>>', 'cnb =', results['cnb'])
    # get results for SGDClassifier
    model_sgd = SGDClassifier(penalty='l2', alpha=1e-5)
    model_sgd.fit(Xt_tfidf3, Yt)
    results['sgd'] = model_sgd.score(Xv_tfidf3, Yv)
    print('>>>', 'sgd =', results['sgd'])
    # get results for LinearSVC
    model_lsvc = LinearSVC(penalty='l2', C=1)
    model_lsvc.fit(Xt_tfidf3, Yt)
    results['lsvc'] = model_lsvc.score(Xv_tfidf3, Yv)
    print('>>>', 'lsvc =', results['lsvc'])
    # load pre-gathered BERT results
    _, _, Yt, Yp, _ = load_bert_test_results(sample_name)
    results['bert'] = accuracy_score(Yt, Yp)
    print('>>>', 'bert =', results['bert'])
    return results

In [10]:
sample_names = [
    'eng_any_any_100000', 'eng_any_long_100000', 'eng_any_short_100000',
    'eng_eq_any_100000', 'eng_eq_long_100000', 'eng_eq_short_100000',
    'any_any_any_100000', 'any_any_long_100000', 'any_any_short_100000',
    'any_eq_any_100000', 'any_eq_long_100000', 'any_eq_short_100000',
]
seed = 1337
test_results = {}

In [None]:
for sample_name in sample_names:
    print(f'=== {sample_name} ===')
    test_results[sample_name] = get_base_results_for_sample(sample_name, seed=seed)

In [None]:
test_results

## Plot Sample Scores

In [None]:
def plot_sample_scores(sample_name, results):
    keys_to_labels = {
        'bl': 'Baseline', 'mnb': 'MNB', 'cnb': 'CNB',
        'sgd': 'SGD', 'lsvc': 'SVC', 'bert': 'BERT'
    }
    X, Y, labels = [], [], []
    for i, (key, value) in enumerate(results[sample_name].items()):
        X.append(i)
        Y.append(value)
        labels.append(keys_to_labels[key])
    plt.rcParams['xtick.direction'] = 'out'
    ax = plt.gca()
    ax = init_ax(ax, only_vertical=True)
    plt.bar(X, Y, color='darkgrey')
    ymin = 0.4 if results[sample_name]['bl'] < 0.55 else 0.6
    plt.ylim([ymin, 1.0])
    plt.xticks(X, labels)
    plt.ylabel('Accuracy')
    plt.xlabel('Model')
    plt.savefig(DIR_FIGS + f'bars_optimal_{sample_name}.png')
    plt.show()

In [None]:
for sample_name in sample_names:
    plot_sample_scores(sample_name, test_results)

## Plot ROC and Confusion Matrices

In [17]:
def plot_sample_roc_cm(sample_name, seed=None, class_reports=False):
    # load sample data
    train, validation = read_data(sample_name, seed=seed)
    Xt, Yt = train['text'].tolist(), train['polarity'].tolist()
    Xv, Yv = validation['text'].tolist(), validation['polarity'].tolist()
    # vectorise sample data
    tfidf3 = TfidfVectorizer(ngram_range=(1, 3), binary=True, smooth_idf=False)
    Xt_tfidf3 = tfidf3.fit_transform(Xt)
    Xv_tfidf3 = tfidf3.transform(Xv)
    tfidf4 = TfidfVectorizer(ngram_range=(1, 4), binary=True, smooth_idf=False)
    Xt_tfidf4 = tfidf4.fit_transform(Xt)
    Xv_tfidf4 = tfidf4.transform(Xv)
    # get baseline results
    model_bl = DummyClassifier(strategy='most_frequent')
    model_bl.fit(Xt, Yt)
    Ypred_bl = model_bl.predict(Xv)
    Yp_bl = model_bl.predict_proba(Xv)[:,1]
    fpr_bl, tpr_bl, _ = roc_curve(Yv, Yp_bl, pos_label=1)
    prec_bl, rec_bl, _ = precision_recall_curve(Yv, Yp_bl, pos_label=1)
    # get model results depending on sample
    if sample_name in ['eng_any_long_100000', 'any_any_any_100000', 'any_any_long_100000']:
        comp_label = 'SVC'
        model_comp = LinearSVC(penalty='l2', C=1, max_iter=100)
        model_comp.fit(Xt_tfidf3, Yt)
        Ypred_comp = model_comp.predict(Xv_tfidf3)
        Yp_comp = model_comp.decision_function(Xv_tfidf3)
    elif sample_name in ['any_eq_short_100000']:
        comp_label = 'CNB'
        model_comp = ComplementNB(alpha=1)
        model_comp.fit(Xt_tfidf4, Yt)
        Ypred_comp = model_comp.predict(Xv_tfidf4)
        Yp_comp = model_comp.predict_proba(Xv_tfidf4)[:,1]
    else:
        comp_label = 'SGD'
        model_comp = SGDClassifier(penalty='l2', alpha=1e-5)
        model_comp.fit(Xt_tfidf3, Yt)
        Ypred_comp = model_comp.predict(Xv_tfidf3)
        Yp_comp = model_comp.decision_function(Xv_tfidf3)
    fpr_comp, tpr_comp, _ = roc_curve(Yv, Yp_comp, pos_label=1)
    prec_comp, rec_comp, _ = precision_recall_curve(Yv, Yp_comp, pos_label=1)
    auc_comp = round(auc(fpr_comp, tpr_comp), 3)
    # load pre-gathered bert results
    _, _, Yt_bert, Yp_bert, Yp_proba = load_bert_test_results(sample_name)
    fpr_bert, tpr_bert, _ = roc_curve(Yt_bert, Yp_proba[:,1], pos_label=1)
    prec_bert, rec_bert, _ = precision_recall_curve(Yt_bert, Yp_proba[:,1], pos_label=1)
    auc_bert = round(auc(fpr_bert, tpr_bert), 3)
    # print classification reports
    if class_reports:
        print(classification_report(Yv, Ypred_bl, zero_division=0))
        print(classification_report(Yv, Ypred_comp, zero_division=0))
        print(classification_report(Yv, Yp_bert, zero_division=0))
        return
    # plot roc curves
    plt.plot(fpr_bert, tpr_bert, linestyle='--', color='#666', label=f'BERT (AUC $= {auc_bert}$)')
    plt.plot(fpr_comp, tpr_comp, linestyle='--', color='#111', label=f'{comp_label} (AUC $= {auc_comp}$)')
    plt.plot(fpr_bl, tpr_bl, linestyle='--', color='#aaa', label='Baseline')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(DIR_FIGS + f'roc_optimal_{sample_name}.png')
    plt.show()
    # plot precision/recall
    plt.plot(rec_bert, prec_bert, linestyle='--', color='#666', label=f'BERT')
    plt.plot(rec_comp, prec_comp, linestyle='--', color='#111', label=f'{comp_label}')
    plt.plot(rec_bl, prec_bl, linestyle='--', color='#aaa', label=f'Baseline')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(DIR_FIGS + f'prec_optimal_{sample_name}.png')
    plt.show()
    # confusion matrices
    plt.rcParams['xtick.direction'] = 'out'
    plt.rcParams['ytick.direction'] = 'out'
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 3), sharey=True)
    plt.subplots_adjust(bottom=0, left=0, top=1, right=1, wspace=-0.4)
    def empty_func(_, ax=None): return
    temp = fig.colorbar
    fig.colorbar = empty_func
    ax1.set_title('BERT')
    ax2.set_title(f'{comp_label}')
    cm_bert = confusion_matrix(Yt_bert, Yp_bert, labels=[0, 1])
    disp_bert = ConfusionMatrixDisplay(cm_bert, display_labels=['Negative', 'Positive'])
    disp_bert.plot(cmap='Greys', ax=ax1)
    fig.colorbar = temp
    cm_comp = confusion_matrix(Yv, Ypred_comp, labels=[0, 1])
    disp_comp = ConfusionMatrixDisplay(cm_comp, display_labels=['Negative', 'Positive'])
    disp_comp.plot(cmap='Greys', ax=ax2)
    plt.savefig(DIR_FIGS + f'cm_optimal_{sample_name}.png')
    plt.show()
    init_plt()

In [18]:
for sample_name in sample_names:
    print(f'=== {sample_name} ===')
    plot_sample_roc_cm(sample_name, seed=seed, class_reports=True)

=== eng_any_any_100000 ===
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1530
           1       0.85      1.00      0.92      8470

    accuracy                           0.85     10000
   macro avg       0.42      0.50      0.46     10000
weighted avg       0.72      0.85      0.78     10000

              precision    recall  f1-score   support

           0       0.68      0.55      0.61      1530
           1       0.92      0.95      0.94      8470

    accuracy                           0.89     10000
   macro avg       0.80      0.75      0.77     10000
weighted avg       0.88      0.89      0.89     10000

              precision    recall  f1-score   support

           0       0.14      0.10      0.12      1530
           1       0.85      0.89      0.87      8470

    accuracy                           0.77     10000
   macro avg       0.49      0.50      0.49     10000
weighted avg       0.74      0.77      0.75    