In [2]:
import json
from scipy.stats import kendalltau

# Loading Sentences

In [18]:
with open('sentences.json', 'r') as file:
    sentences = json.load(file)

# Loading all scores

In [17]:
files = [
    'swer_scores_case_sensitive.json', 
    'swer_scores_case_insensitive.json',
    'swer_scores_no_punctuation.json',
    'swer_scores_stopwords.json',
    'swer_scores_binarystep.json',
    'swer_scores_case_insensitive_no_punctuation.json',
    'swer_scores_case_insensitive_no_punctuation_binarystep.json',
    ]

# initialise a dictionary to hold the data
data = {}

for file in files:
    with open(file, 'r') as f:
        data[file[:-5]] = json.load(f)

# Grouping Sentences by Score

In [19]:
def group_by_severity(sentences, scores, low_threshold=0.15, high_threshold=0.3):
    low, medium, high = [], [], []
    # for score_list in scores_per_model:
    for sentence, score in zip(sentences, scores):
        if score <= low_threshold:
            low.append((sentence, score))
        elif score > low_threshold and score <= high_threshold:
            medium.append((sentence, score))
        else:
            high.append((sentence, score))
    return low, medium, high

In [28]:
sentences_grouped = dict()
for param in data:
    sentences_grouped[param] = dict()
    for model in data[param]:
        if model not in ['wer', 'roberta', 'sbert']:
            sentences_grouped[param][model] = group_by_severity(sentences, data[param][model])

In [93]:
with open("sentences_grouped.json", "w") as file:
    file.write(json.dumps(sentences_grouped))

In [34]:
for param in sentences_grouped:
    for model in sentences_grouped[param]:
        print(param, model, 
              len(sentences_grouped[param][model][0]),
              len(sentences_grouped[param][model][1]),
              len(sentences_grouped[param][model][2]),
              )

swer_scores_case_sensitive norbert 489 41 23
swer_scores_case_sensitive norbert2 455 67 31
swer_scores_case_sensitive nb-bert 419 95 39
swer_scores_case_insensitive norbert 512 21 20
swer_scores_case_insensitive norbert2 482 48 23
swer_scores_case_insensitive nb-bert 451 74 28
swer_scores_no_punctuation norbert 503 27 23
swer_scores_no_punctuation norbert2 506 25 22
swer_scores_no_punctuation nb-bert 483 42 28
swer_scores_stopwords norbert 455 63 35
swer_scores_stopwords norbert2 383 116 54
swer_scores_stopwords nb-bert 344 140 69
swer_scores_binarystep norbert 516 27 10
swer_scores_binarystep norbert2 424 90 39
swer_scores_binarystep nb-bert 421 89 43
swer_scores_case_insensitive_no_punctuation norbert 526 7 20
swer_scores_case_insensitive_no_punctuation norbert2 522 12 19
swer_scores_case_insensitive_no_punctuation nb-bert 505 22 26
swer_scores_case_insensitive_no_punctuation_binarystep norbert 533 14 6
swer_scores_case_insensitive_no_punctuation_binarystep norbert2 512 19 22
swer_sc

# Correlation

In [37]:
wer = data["swer_scores_case_sensitive"]["wer"]
wer_case_insensitive = data["swer_scores_case_insensitive"]["wer"]

## Correlation with WER

In [44]:
for param in data:
    for model in sentences_grouped[param]:
                print(param, "-", model, ":", round(kendalltau(data[param][model], wer).correlation, 2))

swer_scores_case_sensitive - norbert : 0.78
swer_scores_case_sensitive - norbert2 : 0.73
swer_scores_case_sensitive - nb-bert : 0.72
swer_scores_case_insensitive - norbert : 0.59
swer_scores_case_insensitive - norbert2 : 0.6
swer_scores_case_insensitive - nb-bert : 0.6
swer_scores_no_punctuation - norbert : 0.61
swer_scores_no_punctuation - norbert2 : 0.59
swer_scores_no_punctuation - nb-bert : 0.59
swer_scores_stopwords - norbert : 0.59
swer_scores_stopwords - norbert2 : 0.57
swer_scores_stopwords - nb-bert : 0.57
swer_scores_binarystep - norbert : 0.28
swer_scores_binarystep - norbert2 : 0.59
swer_scores_binarystep - nb-bert : 0.56
swer_scores_case_insensitive_no_punctuation - norbert : 0.41
swer_scores_case_insensitive_no_punctuation - norbert2 : 0.41
swer_scores_case_insensitive_no_punctuation - nb-bert : 0.41
swer_scores_case_insensitive_no_punctuation_binarystep - norbert : 0.25
swer_scores_case_insensitive_no_punctuation_binarystep - norbert2 : 0.36
swer_scores_case_insensitive_

## Correlation with case insensitive WER

In [45]:
for param in data:
    for model in sentences_grouped[param]:
        print(param, "-", model, ":", round(kendalltau(data[param][model], wer_case_insensitive).correlation, 2))

swer_scores_case_sensitive - norbert : 0.62
swer_scores_case_sensitive - norbert2 : 0.72
swer_scores_case_sensitive - nb-bert : 0.76
swer_scores_case_insensitive - norbert : 0.87
swer_scores_case_insensitive - norbert2 : 0.85
swer_scores_case_insensitive - nb-bert : 0.85
swer_scores_no_punctuation - norbert : 0.41
swer_scores_no_punctuation - norbert2 : 0.45
swer_scores_no_punctuation - nb-bert : 0.5
swer_scores_stopwords - norbert : 0.55
swer_scores_stopwords - norbert2 : 0.61
swer_scores_stopwords - nb-bert : 0.64
swer_scores_binarystep - norbert : 0.26
swer_scores_binarystep - norbert2 : 0.73
swer_scores_binarystep - nb-bert : 0.73
swer_scores_case_insensitive_no_punctuation - norbert : 0.62
swer_scores_case_insensitive_no_punctuation - norbert2 : 0.62
swer_scores_case_insensitive_no_punctuation - nb-bert : 0.62
swer_scores_case_insensitive_no_punctuation_binarystep - norbert : 0.37
swer_scores_case_insensitive_no_punctuation_binarystep - norbert2 : 0.51
swer_scores_case_insensitive

## Correlation with HWER

In [87]:
with open("survey_sentences_swer.json", "r") as file:
    survey_sentences_swer = json.load(file)

In [88]:
with open("hwer_results.json", "r", encoding='utf-8') as file:
    survey_data = json.load(file)

In [90]:
hwer_scores = [1 - (sent["hwer"] * 0.2) for sent in survey_data]
print(hwer_scores)

for model in survey_sentences_swer:
    print(model, round(kendalltau(hwer_scores, survey_sentences_swer[model]).correlation, 2))


[0.496, 0.6279999999999999, 0.1259999999999999, 0.04800000000000004, 0.576, 0.6639999999999999, 0.5559999999999999, 0.518, 0.17200000000000004, 0.46399999999999997, 0.019999999999999907, 0.18399999999999994, 0.37, 0.19999999999999996, 0.02400000000000002, 0.18599999999999994, 0.14800000000000002, 0.708, 0.43399999999999994, 0.02200000000000002, 0.030000000000000027, 0.15399999999999991, 0.040000000000000036, 0.124, 0.3599999999999999, 0.22999999999999998, 0.10799999999999998, 0.05799999999999994, 0.18599999999999994]
norbert 0.34
norbert2 0.36
nb-bert 0.36


In [85]:
for sentence, norbert_score, norbert2_score, nbbert_score in zip(survey_data, survey_sentences_swer["norbert"], survey_sentences_swer["norbert2"], survey_sentences_swer["nb-bert"]):
    sentence["norbert"] = str(round(norbert_score * 100, 2)) + "\%"
    sentence["norbert2"] = str(round(norbert2_score * 100, 2)) + "\%"
    sentence["nb-bert"] = str(round(nbbert_score * 100, 2)) + "\%"
    sentence["hwer"] = str(sentence["hwer"] * 20) + "\%"

with open("latex_hwer_table_data.json", "w") as file:
    file.write(json.dumps(survey_data))

# ----

In [10]:
with open("swer_scores_case_sensitive.json") as file:
    swer_scores_case_sensitive = json.load(file)
    print(sum(swer_scores_case_sensitive["sbert"]) / len(swer_scores_case_sensitive["sbert"]))
    print(sum(swer_scores_case_sensitive["roberta"]) / len(swer_scores_case_sensitive["roberta"]))
    print(sum(swer_scores_case_sensitive["wer"]) / len(swer_scores_case_sensitive["wer"]))



0.06540322612920357
0.010196644173007778
0.2274163638823016


In [11]:
with open("swer_scores_case_insensitive.json") as file:
    swer_scores_case_sensitive = json.load(file)
    print(sum(swer_scores_case_sensitive["wer"]) / len(swer_scores_case_sensitive["wer"]))

0.14822124528094788


{'wer': [0.2857142857142857,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.35294117647058826,
  0.36538461538461536,
  0.25925925925925924,
  0.25,
  0.21428571428571427,
  0.0,
  0.0,
  0.125,
  0.0,
  0.19444444444444445,
  0.07692307692307693,
  0.0,
  0.15789473684210525,
  0.0625,
  0.0,
  0.16666666666666666,
  0.07692307692307693,
  0.09803921568627451,
  0.4,
  0.0,
  0.375,
  0.0,
  0.13513513513513514,
  0.047619047619047616,
  0.0,
  0.0,
  0.0967741935483871,
  0.0,
  0.125,
  0.09090909090909091,
  0.3333333333333333,
  0.25,
  0.4,
  0.4,
  0.0,
  0.16666666666666666,
  0.0,
  0.0,
  0.1,
  0.0,
  0.11538461538461539,
  0.1,
  0.08333333333333333,
  0.06666666666666667,
  0.2549019607843137,
  0.043478260869565216,
  0.125,
  0.09090909090909091,
  0.11764705882352941,
  0.07692307692307693,
  0.0,
  0.08333333333333333,
  0.09803921568627451,
  0.11538461538461539,
  0.13636363636363635,
  0.037037037037037035,
  0.2222222222222222,
  0.0,
  0.13043478260869565,
  0.0625

In [10]:
for key in set(swer_scores_case_sensitive.keys()) - {"wer"}:
    print(key, kendalltau(swer_scores_case_sensitive[key], swer_scores_case_sensitive["wer"]))

sbert KendalltauResult(correlation=0.11147811583222061, pvalue=0.00010523575770451013)
roberta KendalltauResult(correlation=0.5606387689173574, pvalue=5.099240725349598e-85)
norbert2 KendalltauResult(correlation=0.7273961197371905, pvalue=8.772200906983484e-142)
nb-bert KendalltauResult(correlation=0.7187732477620512, pvalue=1.72715979771197e-138)
norbert KendalltauResult(correlation=0.7769511830667104, pvalue=1.787095823302926e-161)
