# ALBERT on SemEval
---

Training Time for 100 Epochs: 77m44.215s

Training Data Size: 5749

Batch Size: 32

In [1]:
import pandas as pd
import tensorflow.compat.v1 as tf
import csv
import re
import plotly.graph_objects as go

In [2]:
submit_filename = "./sts-b_submit_results.tsv"
test_filename = "./original/sts-test.tsv"
eval_filename = "./eval_results.txt"

## Test Accuracy (Pearson Correlation)

In [3]:
submit = pd.read_csv(submit_filename, sep="\t", index_col='index')

with tf.gfile.Open(test_filename, 'r') as raw:
    reader = csv.reader(raw, delimiter='\t', quotechar=None)
    lines = []
    for line in reader:
        lines.append(line)

test_scores = [float(record[4]) for record in lines]

submit.prediction = submit.prediction.apply(float)
submit_scores = submit.prediction.values.tolist()

test = pd.Series(test_scores)
result = pd.Series(submit_scores)

test.corr(result, method='pearson')

0.8546741918666437

## Test Accuracy (Percentage Accuracy) on margin of 3

In [4]:
def scale_scores(scores, margin=3):
    return [1 if score >= margin else 0 for score in scores]

test = pd.Series(scale_scores(submit_scores))
result = pd.Series(scale_scores(test_scores))

(test == result).sum()/len(test)

0.8433647570703409

#### Albert Results
Pearson score: **0.8546741918666437**

Percentage Accuracy (Binary on margin of 3): **0.8433647570703409**

---
##### STS Benchmark


https://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark

## Training Progress

In [14]:
with open(eval_filename, 'r') as raw:
    eval_logs = raw.readlines()

eval_acc = []
eval_step = []
global_loss = [] 
for line in eval_logs:
    line = line.strip("\n")
    if "eval_accuracy" in line:
        eval_acc.extend(re.findall(r"[-+]?\d*\.\d+|\d+", line))
    if "global_step" in line:
        eval_step.extend(re.findall(r"[-+]?\d*\.\d+|\d+", line))
    if "loss" in line:
        global_loss.extend(re.findall(r"[-+]?\d*\.\d+|\d+", line))

acc = [float(i) for i in eval_acc]
step = [int(i) for i in eval_step]
loss = [float(i) for i in global_loss]

In [15]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=step, y=acc))
fig.update_layout(title="Training Validation Convergence", xaxis_title="Global steps",yaxis_title="Val accuracy")
fig.show()