In [1]:
from transformers import TFBertForSequenceClassification, BertTokenizer, AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import glob
from tqdm.notebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import IPython

import sys
sys.path.append("..")
from data_preparation.data_preparation_sentiment import Example, convert_examples_to_tf_dataset, make_batches
import utils.utils as utils

In [2]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)

In [3]:
import logging
logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR)

In [4]:
data_dir = "../data/sentiment/"
results_path = "../results/balanced_length/results_sentiment_balanced_length.xlsx"
basic_stats = pd.read_excel("../data_exploration/sentiment_basic_stats.xlsx")
en_ref = basic_stats.loc[basic_stats["language"] == "English", "test_avg_tokens"].values[0]

code_dicts = utils.make_lang_code_dicts()
code_to_name = code_dicts["code_to_name"]
name_to_code = code_dicts["name_to_code"]

# Model parameters
model_name = "bert-base-multilingual-cased"
max_length = 512
batch_size = 64

# Model creation
model = TFBertForSequenceClassification.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertForSequenceClassification: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier', 'dropout_37']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
def balance_lengths(test, lengths, target_mean, tokenizer):
    test["lengths"] = lengths
    test = test.sort_values("lengths")
    n = test.shape[0]
    
    if target_mean < test["lengths"].mean():
        while test["lengths"].mean() > target_mean:
            test = test.drop(test.index[-1])
    else:
        while test["lengths"].mean() < target_mean:
            test = test.drop(test.index[0])
        
    lost = n - test.shape[0]
    print("Examples lost:", lost)
    print("Examples remaining:", test.shape[0])
    test = test.drop("lengths", axis=1)
    return test, lost

In [6]:
rel_lengths = pd.read_excel("../data_exploration/relative_lengths.xlsx")

In [8]:
path = "../data/sentiment/"

for weights_filepath in tqdm(glob.glob("E:/TFM_CCIL/checkpoints/*/*_sentiment.hdf5")):
    training_lang = weights_filepath.split("\\")[1]
    
    # Load weights
    model.load_weights(weights_filepath)
    print("\nUsing weights from", weights_filepath)
    
    # Evaluation
    sentiment_eval = []

    for lang in tqdm(os.listdir(path)):
        if lang not in ["tr", "ja", "ru"]:
            print(lang)
            
            # Load and preprocess
            test = pd.read_csv(path + lang + "/test.csv", header=None)
            test.columns = ["sentiment", "review"]
            lengths = test["review"].apply(lambda x: len(tokenizer.encode(x)))
            rel_dif = rel_lengths.loc[rel_lengths["Language"] == code_to_name[lang], 
                                      "Relative Difference (%)"].values[0] / 100
            target_mean = (1 + rel_dif) * en_ref
            test, lost = balance_lengths(test, lengths, target_mean, tokenizer)
            if test.shape[0] == 0 or lost == 0:
                continue

            # Convert to TF dataset
            test_dataset = convert_examples_to_tf_dataset([(Example(text=text, category_index=label)) for label, 
                                                           text in test.values], 
                                                          tokenizer, max_length=max_length)
            test_dataset, test_batches = make_batches(test_dataset, batch_size, repetitions=1, shuffle=False)

            # Predict
            preds = model.predict(test_dataset, steps=np.ceil(test.shape[0] / batch_size), verbose=1)
            clean_preds = preds[0].argmax(axis=-1)

            # Metrics
            accuracy = accuracy_score(test["sentiment"].values, clean_preds)
            precision = precision_score(test["sentiment"].values, clean_preds, average="macro", zero_division=0)
            recall = recall_score(test["sentiment"].values, clean_preds, average="macro")
            f1 = f1_score(test["sentiment"].values, clean_preds, average="macro")
            sentiment_eval.append((lang, accuracy, precision, recall, f1))
            
    # Build table
    sentiment_eval = np.array(sentiment_eval, dtype=object)
    table = pd.DataFrame({"Language": sentiment_eval[:,0],
                          "Accuracy": sentiment_eval[:,1],
                          "Macro_Precision": sentiment_eval[:,2],
                          "Macro_Recall": sentiment_eval[:,3],
                          "Macro_F1": sentiment_eval[:,4]})
    table["Language"] = table["Language"].apply(lambda x: code_to_name[x])
    file = open("../data_exploration/sentiment_table.txt", "r")
    lang_order = [line.split("&")[1].strip() for line in file.readlines()]
    lang_order = [lang for lang in lang_order if lang not in ["Turkish", "Japanese", "Russian"]]
    table["sort"] = table["Language"].apply(lambda x: lang_order.index(x))
    table = table.sort_values(by=["sort"]).drop("sort", axis=1).reset_index(drop=True)
    
    # Update results file
    if os.path.isfile(results_path):
        results = pd.read_excel(results_path, sheet_name=None)
    else:
        results = dict.fromkeys(table.columns[1:].values, pd.DataFrame({"Language": table["Language"].values}))
    
    with pd.ExcelWriter(results_path) as writer:
        full_training_lang = code_to_name[training_lang]
        for sheet_name, df in results.items():
            # Add each the column for each metric in the corresponding sheet
            df[full_training_lang] = table[sheet_name]
            df.to_excel(writer, index=False, sheet_name=sheet_name)

HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


Using weights from E:/TFM_CCIL/checkpoints\ar\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\bg\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\en\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\eu\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\fi\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\he\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\hr\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\ko\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\sk\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\th\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\vi\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990


Using weights from E:/TFM_CCIL/checkpoints\zh\bert-base-multilingual-cased_sentiment.hdf5


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

ar
Examples lost: 4810
Examples remaining: 5266
bg
Examples lost: 1193
Examples remaining: 480
en
Examples lost: 0
Examples remaining: 1821
eu
Examples lost: 145
Examples remaining: 82
fi
Examples lost: 245
Examples remaining: 152
he
Examples lost: 138
Examples remaining: 2354
hr
Examples lost: 128
Examples remaining: 309
ko
Examples lost: 203
Examples remaining: 723
sk
Examples lost: 23
Examples remaining: 1041
th
Examples lost: 70
Examples remaining: 2274
vi
Examples lost: 7
Examples remaining: 678
zh
Examples lost: 2523
Examples remaining: 2990




In [9]:
results = pd.read_excel(results_path, sheet_name=None)

In [11]:
unbalanced_results = pd.read_excel("../results/results_sentiment.xlsx", sheet_name=None)

In [12]:
new_results = {}
for sheet_name in results:
    new_results[sheet_name] = utils.order_table(pd.concat([results[sheet_name], 
                                         unbalanced_results[sheet_name].loc[~unbalanced_results[sheet_name]["Language"].isin(
                                             results[sheet_name]["Language"].values
                                         )]], ignore_index=True))

In [14]:
with pd.ExcelWriter(results_path) as writer:
    for sheet_name, df in new_results.items():
        df.to_excel(writer, index=False, sheet_name=sheet_name)

Recalculate baselines

In [15]:
baselines = []

for lang in tqdm(os.listdir(path)):
    if lang not in ["tr", "ja", "ru"]:
        # Load and preprocess
        test = pd.read_csv(path + lang + "/test.csv", header=None)
        test.columns = ["sentiment", "review"]
        lengths = test["review"].apply(lambda x: len(tokenizer.encode(x)))
        rel_dif = rel_lengths.loc[rel_lengths["Language"] == code_to_name[lang], 
                                  "Relative Difference (%)"].values[0] / 100
        target_mean = (1 + rel_dif) * en_ref
        test, lost = balance_lengths(test, lengths, target_mean, tokenizer)
        
        # Metrics
        y_true = test["sentiment"].values
        y_pred = [test["sentiment"].mode()[0]] * len(y_true)
        acc = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average="macro", zero_division=0)
        recall = recall_score(y_true, y_pred, average="macro")
        f1 = f1_score(y_true, y_pred, average="macro")
        baselines.append((code_to_name[lang], acc, precision, recall, f1))

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

Examples lost: 4810
Examples remaining: 5266
Examples lost: 1193
Examples remaining: 480
Examples lost: 0
Examples remaining: 1821
Examples lost: 145
Examples remaining: 82
Examples lost: 245
Examples remaining: 152
Examples lost: 138
Examples remaining: 2354
Examples lost: 128
Examples remaining: 309
Examples lost: 203
Examples remaining: 723
Examples lost: 23
Examples remaining: 1041
Examples lost: 70
Examples remaining: 2274
Examples lost: 7
Examples remaining: 678
Examples lost: 2523
Examples remaining: 2990



In [16]:
baselines = pd.DataFrame(np.array(baselines), columns=["Language", "Accuracy", "Macro_Precision", "Macro_Recall", "Macro_F1"])
baselines.iloc[:, 1:] = baselines.iloc[:, 1:].astype(float)
baselines = utils.order_table(baselines)
baselines

Unnamed: 0,Language,Accuracy,Macro_Precision,Macro_Recall,Macro_F1
0,Bulgarian,0.73125,0.365625,0.5,0.422383
1,English,0.500824,0.250412,0.5,0.333699
2,Slovak,0.874159,0.43708,0.5,0.466427
3,Croatian,0.838188,0.419094,0.5,0.455986
4,Chinese,0.6,0.3,0.5,0.375
5,Vietnamese,0.514749,0.257375,0.5,0.339825
6,Thai,0.599824,0.299912,0.5,0.374931
7,Finnish,0.828947,0.414474,0.5,0.453237
8,Basque,0.804878,0.402439,0.5,0.445946
9,Korean,0.586445,0.293223,0.5,0.36966


In [17]:
sentiment_baselines_path = "../results/balanced_length/baselines_sentiment_balanced_length.xlsx"

with pd.ExcelWriter(sentiment_baselines_path) as writer:
    for metric in baselines.columns[1:]:
        baselines[["Language", metric]].rename(columns={metric: "Baseline"}).to_excel(writer, index=False, sheet_name=metric)