In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from sklearn import metrics as sklearn_metrics

In [None]:
fname = 'dev_set_score.csv'
dev_score_path = f'/content/drive/MyDrive/Colab Notebooks/wanted_preonboarding/assignments/sts/{fname}'
dev_df = pd.read_csv(dev_score_path, index_col = 0)

In [None]:
def pearson_and_spearman(labels, preds):
    pearson_corr = pearsonr(preds, labels)[0]
    spearman_corr = spearmanr(preds, labels)[0]
    return {
        "pearson": pearson_corr,
        "spearmanr": spearman_corr,
        "corr": (pearson_corr + spearman_corr) / 2,
    }


def f1_pre_rec(labels, preds):
    return {
        "precision": sklearn_metrics.precision_score(labels, preds, average="macro"),
        "recall": sklearn_metrics.recall_score(labels, preds, average="macro"),
        "f1": sklearn_metrics.f1_score(labels, preds, average="macro"),
    }


def compute_metrics(metric_name, labels, preds):
    assert len(preds) == len(labels)
    if metric_name == "pearson_and_spearman":
        return pearson_and_spearman(labels, preds)
    elif metric_name == "f1_pre_rec":
        return f1_pre_rec(labels, preds)
    else:
        raise KeyError(metric_name)

In [None]:
## pearson & spearman
corrs = {}
corr = compute_metrics("pearson_and_spearman", dev_df['true_real_label'], dev_df['predict_real_label'])
corrs.update(corr)
print(corrs)

{'pearson': 0.9309178133583659, 'spearmanr': 0.9271314692546173, 'corr': 0.9290246413064915}


In [None]:
f1 = compute_metrics("f1_pre_rec",dev_df['true_binary_label'], dev_df['predict_binary_label'])['f1']
precision = compute_metrics("f1_pre_rec",dev_df['true_binary_label'], dev_df['predict_binary_label'])['precision']
recall = compute_metrics("f1_pre_rec",dev_df['true_binary_label'], dev_df['predict_binary_label'])['recall']  

In [None]:
print(f"f1 score  : {f1}")
print(f"precision : {precision}")
print(f"recall    : {recall}")

f1 score  : 0.8780258519388955
precision : 0.8797082937262357
recall    : 0.8886439647309212
