In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import json

from nsc.api import tables
from nsc.api import utils
from nsc.utils import io

from spell_checking import BENCHMARK_DIR

In [3]:
def get_table_data(file_path: str, task: str) -> list:
    data = []
    with open(file_path, "r") as inf:
        json_data = json.load(inf)
        for model in sorted(json_data):
            runtime, file_size = json_data[model]
            kbs = (file_size / 1000) / runtime
            data.append([task, model, f"{runtime:.1f}", f"{kbs:.1f}"])
    return data

In [5]:
headers = [["Task", "Model", "Runtime in s", "kB/s"]]
data = []
horizontal_lines = []

sed_words_data = get_table_data("runtime_stats/sed_words.json", "SEDS/SEDW")
data.extend(sed_words_data)
horizontal_lines.extend([False] * (len(sed_words_data) - 1) + [True])

sec_data = get_table_data("runtime_stats/sec_stats.json", "SEC")
data.extend(sec_data)
horizontal_lines.extend([False] * (len(sec_data) - 1) + [True])

sec_neuspell_data = get_table_data("runtime_stats/sec_neuspell.json", "SEC")
data.extend(sec_neuspell_data)
horizontal_lines.extend([False] * (len(sec_neuspell_data) - 1) + [True])

sec_with_sed_data = get_table_data("runtime_stats/sec_with_sed_stats.json", r"SEDW $\rightarrow$ SEC")
data.extend(sec_with_sed_data)
horizontal_lines.extend([False] * (len(sec_with_sed_data) - 1) + [True])

latex_table = tables.generate_table(
    headers,
    data,
    horizontal_lines=horizontal_lines,
    fmt="latex"
)
utils.save_text_file(os.path.join(BENCHMARK_DIR, "test", "runtime_tables", "runtimes.tex"), [latex_table])
print(tables.generate_table(
    headers,
    data,
    horizontal_lines=horizontal_lines,
    fmt="markdown"
))

| Task | Model | Runtime in s | kB/s |
| :-- | --: | --: | --: |
| SEDS/SEDW | gnn | 6.6 | 26.4 |
| SEDS/SEDW | gnn\textsuperscript{+} | 6.8 | 25.8 |
| SEDS/SEDW | tokenization repair\textsuperscript{+} | 8.7 | 20.1 |
| SEDS/SEDW | transformer | 3.5 | 50.6 |
| SEDS/SEDW | transformer\textsuperscript{+} | 3.7 | 47.6 |
| SEC | tokenization repair\textsuperscript{++} | 40.6 | 4.3 |
| SEC | transformer | 65.1 | 2.7 |
| SEC | transformer with tokenization repair | 71.1 | 2.5 |
| SEC | transformer word | 28.0 | 6.3 |
| SEC | neuspell bert | 18.1 | 9.6 |
| SEDW $\rightarrow$ SEC | gnn\textsuperscript{+} $\rightarrow$ transformer | 48.1 | 3.6 |
| SEDW $\rightarrow$ SEC | gnn\textsuperscript{+} $\rightarrow$ transformer word | 17.2 | 10.2 |
| SEDW $\rightarrow$ SEC | tokenization repair\textsuperscript{++} | 20.0 | 8.8 |
