# Results

This notebook processes log files to plot result graphs (used for the thesis).

In [None]:
import awe.utils
awe.utils.init_notebook()

In [None]:
import collections
import json
import math
import os

import pandas as pd
import plotnine
import yaml

import awe.training.params
import awe.training.versioning

In [None]:
# Create a common theme shared by plots.
THEME = (plotnine.theme_538()
    + plotnine.theme(
        plot_background=plotnine.element_blank(),
        panel_background=plotnine.element_blank(),
    )
)

## Per-label results

This part assumes that the logdir (`log/` in repository root by default)
contains results of all final cross-validation experiments.

In [None]:
# Get versions that have crossval results.
versions = [
    v for v in awe.training.versioning.Version.get_all()
    if os.path.exists(v.get_results_path('crossval'))
]
versions

In [None]:
# Read version params.
params = [
    awe.training.params.Params.load_version(v)
    for v in versions
]

In [None]:
# Read crossval results.
def read_results(version: awe.training.versioning.Version):
    crossval_path = version.get_results_path('crossval')
    with open(crossval_path, mode='r', encoding='utf-8') as f:
        return json.load(f)
results = [read_results(v) for v in versions]

In [None]:
# Gather data for the plot.
F1_PREFIX = 'f1/label_'
F1_EM_PREFIX = 'f1_em/label_'
def transform_label(label: str):
    label = label.removeprefix(F1_PREFIX).removeprefix(F1_EM_PREFIX)
    if (idx := label.find('_')) >= 0:
        label = label[:idx]
    return label
def get_target(p: awe.training.params.Params, k: str):
    if p.dataset == awe.training.params.Dataset.swde:
        return f'SWDE {p.vertical} (F1)'
    if k.startswith(F1_EM_PREFIX):
        return 'Apify product (EM)'
    return 'Apify product (F1)'
df = pd.DataFrame([
    {
        'label': transform_label(k),
        'target': get_target(p, k),
        'model': 'ours' if p.load_visuals else 'baseline',
        'f1': round(v['mean'] * 100),
        'std': round(v['std'] * 100),
    }
    for p, result in zip(params, results)
    for k, v in result.items()
    if k.startswith(F1_PREFIX) or k.startswith(F1_EM_PREFIX)
])
df.sort_values(by='model', ascending=False, inplace=True)
df

In [None]:
plot = (plotnine.ggplot(df, plotnine.aes(x='label', y='f1', fill='model'))
    + plotnine.geom_col(stat='identity', position='dodge')
    + plotnine.geom_text(plotnine.aes(label='f1'),
        position=plotnine.position_dodge(width=0.9),
        size=8,
        va='bottom',
        color='#555555',
    )
    + plotnine.facet_wrap('target', scales='free_x')
    + plotnine.ylim(0, 105)
    + plotnine.scale_fill_manual(('#999999', '#0072B2'))
    + THEME
    + plotnine.theme(
        axis_title_x=plotnine.element_blank(),
        axis_title_y=plotnine.element_blank(),
        subplots_adjust={'hspace': 0.4, 'wspace': 0.2},
        legend_position='top',
        legend_title=plotnine.element_blank(),
    )
)
plot

In [None]:
plot.save('data/labels.pdf')

## Results table

This part generates rows for the results table in the thesis
according to real logs.
Assumptions are the same as in the previous section.

In [None]:
# Generate rows with our results for the thesis (`tab/results.tex`).
F1_PAGE = 'f1/page'
F1_EM_PAGE = 'f1_em/page'
def repr_result(result: dict[str, float]):
    value = round(result['mean'] * 100)
    std = round(result['std'] * 100)
    return f'${value} \pm {std}$'
def find_result(metric: str, ours: bool, dataset: str = 'swde', vertical: str = None):
    return next(
        r[metric]
        for p, r in zip(params, results)
        if (
            (not dataset or dataset == p.dataset) and
            (not vertical or vertical == p.vertical) and
            p.load_visuals == ours
        )
    )
def mean_with_std(result_1: dict[str, float], result_2: dict[str, float]):
    var_1 = result_1['std'] ** 2
    var_2 = result_2['std'] ** 2
    mean_var = (var_1 + var_2) / 2
    return {
        'mean': (result_1['mean'] + result_2['mean']) / 2,
        'std': math.sqrt(mean_var)
    }
df = pd.DataFrame([
    {
        'model': 'ours' if ours else 'baseline',
        'year': '2022' if ours else '',
        'auto': repr_result(auto := find_result(F1_PAGE, ours, vertical='auto')),
        'job': repr_result(job := find_result(F1_PAGE, ours, vertical='job')),
        'mean': repr_result(mean_with_std(auto, job)),
        'apify (F1)': repr_result(find_result(F1_PAGE, ours, dataset='apify')),
        'apify (EM)': repr_result(find_result(F1_EM_PAGE, ours, dataset='apify')),
    }
    for ours in [False, True]
])
df

In [None]:
# Convert to LaTeX code ready to copy & paste into the thesis.
for row in df.iloc:
    for idx, cell in enumerate(row):
        if idx != 0:
            print('&', end=' ')
        print(cell, end=' ')
    print('\\\\')

## SWDE visuals

This part generates a plot with counts of visual pages in the SWDE dataset.
It assumes existence of the manually-created file `visual_pages.yaml`
(which can be obtained from the
[`swde-visual` GitHub repository](https://github.com/jjonescz/swde-visual)).

In [None]:
# The file comes from https://github.com/jjonescz/swde-visual.
with open('data/swde/visual_pages.yaml', mode='r', encoding='utf-8') as f:
    swde_visual = yaml.safe_load(f)

In [None]:
# Count pages in each vertical.
visual_counts = {
    vertical: collections.Counter((
        (
            'none' if v == 'error' else v
            for v in websites.values()
        )
    ))
    for vertical, websites in swde_visual.items()
}
visual_counts

In [None]:
# Gather data for the plot.
df = pd.DataFrame([
    {
        'vertical': vertical,
        'category': category,
        'websites': count,
    }
    for vertical, counts in visual_counts.items()
    for category, count in counts.items()
])
df

In [None]:
plot = (plotnine.ggplot(df, plotnine.aes(x='vertical', y='websites', fill='category'))
    + plotnine.scale_fill_manual(
        labels=['full', 'partial', 'none'],
        values=['#009E73', '#E69F00', '#D55E00'],
    )
    + plotnine.geom_col(stat='identity')
    + plotnine.scale_y_discrete(limits=range(1, 11))
    + THEME
    + plotnine.theme(
        axis_title_x=plotnine.element_blank(),
        legend_title=plotnine.element_blank(),
    )
)
plot

In [None]:
plot.save('data/visuals.pdf')