# Thesis experiments

In [1]:
import numpy
from cade.metrics.comparative import moving_lncs2
from gensim.models.word2vec import Word2Vec
from scipy.spatial.distance import cosine
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report
)
from scipy.stats import spearmanr
from tabulate import tabulate
from config import CURRENT_EXP_DIR, config, get_logger, log_config


## Load language models and groundtruth

In [2]:
def get_models(lang: str):
    model1 = Word2Vec.load(
        CURRENT_EXP_DIR.split("_")[0]
        + "_0"
        + "/model/"
        + lang
        + "/corpus1.model"
    )
    model2 = Word2Vec.load(
        CURRENT_EXP_DIR.split("_")[0]
        + "_0"
        + "/model/"
        + lang
        + "/corpus2.model"
    )
    return model1, model2

def get_gt(lang: str, binary=True):
    binary_truth = numpy.loadtxt(
        "./data/"
        + lang
        + "/semeval2020_ulscd_"
        + lang[:3]
        + "/truth/" + ("binary" if binary else "graded") + ".txt",
        dtype=str,
        delimiter="\t",
    )
    return binary_truth

### English (Hyper on ACC: thr=0.7329, t=0.6107, NN=36)

In [5]:
lang = "english"
# Load models
model1, model2 = get_models(lang)
# Load binary truths
binary_truth = get_gt(lang)
# Task 1 - Binary Classification
table = []
predictions = []
i = 0
for word in binary_truth[:, 0]:
    prediction = (
        0
        if moving_lncs2(word, model1, model2, 36, 0.6107) >= 0.7329
        else 1
    )
    predictions.append(prediction)
    table.append([word, str(binary_truth[i, 1]), str(prediction)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Prediction"]))
print("CLassification score for " + lang)
print(
    "\n"
    + classification_report(
        binary_truth[:, 1].astype(float),
        numpy.array(predictions),
        target_names=["class 0 (stable)", "class 1 (change)"],
    )
)
# Load scores truths
score_truth = get_gt(lang, binary=False)
table = []
# Task 2 - Semantic Shift Score
scores = []
i = 0
for word in score_truth[:, 0]:
    score = 1 - moving_lncs2(word, model1, model2, 36, 0.6107)
    scores.append(score)
    table.append([word, str(score_truth[i, 1]), str(score)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Rank"]))
rho, _ = spearmanr(scores, score_truth[:, 1], nan_policy="raise")
print("CLassification score for " + lang)
print("Spearman score for " + lang + ": " + str(rho))

Word                Truth    Prediction
----------------  -------  ------------
attack_nn               1             0
bag_nn                  0             0
ball_nn                 0             0
bit_nn                  1             1
chairman_nn             0             0
circle_vb               1             0
contemplation_nn        0             0
donkey_nn               0             0
edge_nn                 1             0
face_nn                 0             0
fiction_nn              0             0
gas_nn                  0             0
graft_nn                1             1
head_nn                 1             0
land_nn                 1             0
lane_nn                 0             0
lass_nn                 1             1
multitude_nn            0             1
ounce_nn                0             0
part_nn                 0             0
pin_vb                  0             0
plane_nn                1             1
player_nn               1             1


### German (Hyper on ACC: thr=0.5, t=7930, NN=18)

In [6]:
lang = "german"
# Load models
model1, model2 = get_models(lang)
# Load binary truths
binary_truth = get_gt(lang)
# Task 1 - Binary Classification
table = []
predictions = []
i = 0
for word in binary_truth[:, 0]:
    prediction = (
        0
        if moving_lncs2(word, model1, model2, 18, 0.7930) >= 0.5
        else 1
    )
    predictions.append(prediction)
    table.append([word, str(binary_truth[i, 1]), str(prediction)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Prediction"]))
print("CLassification score for " + lang)
print(
    "\n"
    + classification_report(
        binary_truth[:, 1].astype(float),
        numpy.array(predictions),
        target_names=["class 0 (stable)", "class 1 (change)"],
    )
)
# Load scores truths
score_truth = get_gt(lang, binary=False)
table = []
# Task 2 - Semantic Shift Score
scores = []
i = 0
for word in score_truth[:, 0]:
    score = 1 - moving_lncs2(word, model1, model2, 18, 0.7930)
    scores.append(score)
    table.append([word, str(score_truth[i, 1]), str(score)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Rank"]))
rho, _ = spearmanr(scores, score_truth[:, 1], nan_policy="raise")
print("CLassification score for " + lang)
print("Spearman score for " + lang + ": " + str(rho))

Word                  Truth    Prediction
------------------  -------  ------------
abbauen                   1             1
abdecken                  1             0
abgebrüht                 0             1
Abgesang                  1             1
Ackergerät                0             0
Armenhaus                 0             1
artikulieren              1             1
aufrechterhalten          0             1
Ausnahmegesetz            0             0
ausspannen                1             1
beimischen                0             0
Dynamik                   1             1
Einreichung               0             0
Eintagsfliege             0             1
Engpaß                    1             1
Entscheidung              0             0
Festspiel                 0             0
Frechheit                 0             0
Fuß                       0             0
Gesichtsausdruck          0             0
Knotenpunkt               1             1
Kubikmeter                0       

### Latin (Hyper on ACC: thr=0.7820, t=7061, NN=43)

In [7]:
lang = "latin"
# Load models
model1, model2 = get_models(lang)
# Load binary truths
binary_truth = get_gt(lang)
# Task 1 - Binary Classification
table = []
predictions = []
i = 0
for word in binary_truth[:, 0]:
    prediction = (
        0
        if moving_lncs2(word, model1, model2, 43, 0.7061) >= 0.7820
        else 1
    )
    predictions.append(prediction)
    table.append([word, str(binary_truth[i, 1]), str(prediction)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Prediction"]))
print("CLassification score for " + lang)
print(
    "\n"
    + classification_report(
        binary_truth[:, 1].astype(float),
        numpy.array(predictions),
        target_names=["class 0 (stable)", "class 1 (change)"],
    )
)
# Load scores truths
score_truth = get_gt(lang, binary=False)
table = []
# Task 2 - Semantic Shift Score
scores = []
i = 0
for word in score_truth[:, 0]:
    score = 1 - moving_lncs2(word, model1, model2, 43, 0.7061)
    scores.append(score)
    table.append([word, str(score_truth[i, 1]), str(score)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Rank"]))
rho, _ = spearmanr(scores, score_truth[:, 1], nan_policy="raise")
print("CLassification score for " + lang)
print("Spearman score for " + lang + ": " + str(rho))

Word           Truth    Prediction
-----------  -------  ------------
acerbus            0             0
adsumo             1             1
ancilla            0             1
beatus             1             1
civitas            1             1
cohors             1             1
consilium          0             0
consul             1             1
credo              1             1
dolus              1             1
dubius             1             1
dux                1             1
fidelis            0             1
honor              0             0
hostis             0             0
humanitas          1             1
imperator          1             1
itero              0             1
jus                1             1
licet              1             1
necessarius        0             1
nepos              1             1
nobilitas          0             0
oportet            0             1
poena              0             0
pontifex           1             1
potestas           1

### Swedish (Hyper on ACC: thr=0.5539, t=0.2343, NN=10)

In [8]:
lang = "swedish"
# Load models
model1, model2 = get_models(lang)
# Load binary truths
binary_truth = get_gt(lang)
# Task 1 - Binary Classification
table = []
predictions = []
i = 0
for word in binary_truth[:, 0]:
    prediction = (
        0
        if moving_lncs2(word, model1, model2, 10, 0.2343) >= 0.5539
        else 1
    )
    predictions.append(prediction)
    table.append([word, str(binary_truth[i, 1]), str(prediction)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Prediction"]))
print("CLassification score for " + lang)
print(
    "\n"
    + classification_report(
        binary_truth[:, 1].astype(float),
        numpy.array(predictions),
        target_names=["class 0 (stable)", "class 1 (change)"],
    )
)
# Load scores truths
score_truth = get_gt(lang, binary=False)
table = []
# Task 2 - Semantic Shift Score
scores = []
i = 0
for word in score_truth[:, 0]:
    score = 1 - moving_lncs2(word, model1, model2, 10, 0.2343)
    scores.append(score)
    table.append([word, str(score_truth[i, 1]), str(score)])
    i += 1
print(tabulate(table, headers=["Word","Truth", "Rank"]))
rho, _ = spearmanr(scores, score_truth[:, 1], nan_policy="raise")
print("CLassification score for " + lang)
print("Spearman score for " + lang + ": " + str(rho))

Word            Truth    Prediction
------------  -------  ------------
aktiv               0             0
annandag            0             0
antyda              0             1
bearbeta            0             0
bedömande           0             0
beredning           0             0
blockera            0             0
bolagsstämma        0             0
bröllop             0             0
by                  0             0
central             0             1
färg                0             0
förhandling         0             0
gagn                0             0
granskare           1             0
kemisk              0             0
kokärt              0             0
konduktör           1             1
krita               1             0
ledning             1             0
medium              1             1
motiv               1             0
notis               0             0
studie              0             0
undertrycka         0             0
uppfattning         1       