In [14]:
import os
import pickle

import pandas as pd
from pprint import pprint
from transformers.modelcard import parse_log_history

In [2]:
##### INPUTS #####
dataset_name = "race_pp_4000"
seed = 123
model_name = "DistilBERT"
encoding = "question_all"

## Test metrics

In [3]:
def get_metrics(dataset_name: str, model_name: str, encoding: str, seed: int) -> pd.DataFrame:
    """Read metrics from output directory and return as a dataframe."""
    output_dir = os.path.join('output', dataset_name, 'seed_' + str(seed))

    metrics = pd.read_csv(os.path.join(output_dir, 'eval_metrics_' + model_name + '_' + encoding + '.csv'))
    metrics = metrics.transpose().reset_index().rename(columns={'index': 'metric', 0: 'value'})
    return metrics

metrics = get_metrics(dataset_name, model_name, encoding, seed)
metrics

Unnamed: 0,metric,value
0,test_mean_absolute_error,0.193194
1,train_mean_absolute_error,0.141083
2,test_root_mean_squared_error,0.447134
3,train_root_mean_squared_error,0.379803
4,test_r2_score,0.449773
5,train_r2_score,0.783625
6,test_spearman_rho,0.738857
7,train_spearman_rho,0.889146
8,test_pearson_rho,0.742001
9,train_pearson_rho,0.88895


## Training logs

In [21]:
def get_train_logs(dataset_name: str, model_name: str, encoding: str, seed: int) -> dict:
    """Read training logs from output directory and return as a dictionary."""
    output_dir = os.path.join('output', dataset_name, 'seed_' + str(seed))

    with open(os.path.join(output_dir, model_name + '_' + encoding, "train_logs.pickle"), 'rb') as handle:
        logs = pickle.load(handle)
    return logs

logs = get_train_logs(dataset_name, model_name, encoding, seed)
pprint(logs)

[{'epoch': 0.67,
  'learning_rate': 6.666666666666667e-06,
  'loss': 0.2432,
  'step': 500},
 {'epoch': 1.0,
  'eval_loss': 0.16513565182685852,
  'eval_r_squared': 0.55,
  'eval_runtime': 14.9092,
  'eval_samples_per_second': 375.541,
  'eval_steps_per_second': 23.476,
  'step': 750},
 {'epoch': 1.0,
  'step': 750,
  'total_flos': 794790217728000.0,
  'train_loss': 0.22412510172526043,
  'train_runtime': 116.5916,
  'train_samples_per_second': 102.923,
  'train_steps_per_second': 6.433}]


In [19]:
train_log, lines, eval_results = parse_log_history(logs)

print("="*3, "train_log", "="*3)
pprint(train_log)
print("="*3, "lines", "="*3)
pprint(lines)
print("="*3, "eval_results", "="*3)
pprint(eval_results)

=== train_log ===
{'epoch': 1.0,
 'step': 750,
 'total_flos': 794790217728000.0,
 'train_loss': 0.22412510172526043,
 'train_runtime': 116.5916,
 'train_samples_per_second': 102.923,
 'train_steps_per_second': 6.433}
=== lines ===
[{'Epoch': 1.0,
  'R Squared': 0.55,
  'Step': 750,
  'Training Loss': 0.2432,
  'Validation Loss': 0.16513565182685852}]
=== eval_results ===
{'Loss': 0.16513565182685852, 'R Squared': 0.55}
