# Apply A11y Metrics

In [1]:
import pandas as pd
import os
from constants import EVALUATION_DATE_FOLDER
import re
import numpy as np

## Add Potential Failures

In [2]:
ddf = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_evaluation.csv'))
jdf = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'journal-portal_evaluation.csv'))

# Unique columns
_columns = list(set(list(ddf.columns) + list(jdf.columns)))

# First, work with only "errors"
ERROR_CATEGORIES = [column.split('error_')[1] for column in list(filter(lambda x: 'error' in x, _columns))]

ERROR_CATEGORIES.sort()

ERROR_CATEGORIES

['alt_area_missing',
 'alt_input_missing',
 'alt_link_missing',
 'alt_map_missing',
 'alt_missing',
 'alt_spacer_missing',
 'aria_menu_broken',
 'aria_reference_broken',
 'blink',
 'button_empty',
 'count',
 'heading_empty',
 'label_empty',
 'label_missing',
 'label_multiple',
 'language_missing',
 'link_empty',
 'link_skip_broken',
 'longdesc_invalid',
 'marquee',
 'meta_refresh',
 'th_empty',
 'title_invalid']

Now, refer to the API document to identify relevant items for each issue (e.g., `<image />` for `alt_missing`)
- https://wave.webaim.org/api/docs?format=html

- **alt_area_missing**: count `<area>` elements
- **alt_input_missing**: count `<input type="image">` elements
- **alt_link_missing**: count `<img>` elements
- **alt_map_missing**: count `<img usemap="something">` elements
- **alt_missing**: count `<img>` elements
- **alt_spacer_missing**: count `<img>` elements
- **aria_menu_broken**: count any element with `role="menu"`
- **aria_reference_broken**: count any element with either `aria-labelledby="somehting"` or `aria-describedby="something"`
- **blink**: count `<blink>` elements ("A non-empty `<blink>` element or other text has CSS text-decoration:blink styling.", so we may need to count any text elements as well) 
- **button_empty**: count `<button>`, `<input type="submit">`, `<input type="button">`, and `<input type="reset">` elements
- **heading_empty**: count H1, H2, H3, H4, H5, and H6
- **label_empty**: count `<label>` under `<form>`
- **label_missing**: count `<input>` (except types of image, submit, reset, button, or hidden), `<select>`, and `<textarea>`
- **label_multiple**: count `<label>`
- **language_missing**: always `1`
- **link_empty**: count `a`
- **link_skip_broken**: count `a`
- **marquee**: do not need to do count, since the ratio is always `1`
- **meta_refresh**: do not need to do count, since the ratio is always 1
- **th_empty**: count `<th>`
- **title_invalid**: always `1`

In [3]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

In [16]:
def count_num_potential_failures(url):
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--dns-prefetch-disable")
    browser = webdriver.Chrome(options = options)
    try:
        IMPLICIT_TIME_OUT = 5
        browser.implicitly_wait(IMPLICIT_TIME_OUT) # seconds
        browser.set_page_load_timeout(IMPLICIT_TIME_OUT)
        browser.get(url)
    except:
        print("Cannot get url: {}".format(url))
        return None
    
    timeout = 0.5
    try:
        element_present = EC.presence_of_element_located((By.ID, 'random-string-cannot-find')) # implicit wait is not working, so ...
        WebDriverWait(browser, timeout).until(element_present)
    except TimeoutException:
        # print("Timed out waiting for page to load")
        a = 1
        
    soup = BeautifulSoup(browser.page_source, "html.parser")
    
    browser.quit()

    num_area = len(soup.find_all("area"))
    num_input_type_image = len(soup.find_all("input", type="image"))
    num_img = len(soup.find_all("img"))
    num_input_usemap = len(soup.find_all("img", usemap=True))
    num_button = len(soup.find_all("button"))
    num_role_menu = len(soup.find_all(role="menu"))
    num_aria_labelledby = len(soup.find_all({ "aria-labelledby": True }))
    num_aria_describedby = len(soup.find_all({ "aria-describedby": True }))
    num_blink = len(soup.find_all("blink"))
    num_input_type_submit = len(soup.find_all("input", type="submit"))
    num_input_type_button = len(soup.find_all("input", type="button"))
    num_input_type_reset = len(soup.find_all("input", type="reset"))
    num_headings = len(soup.find_all(re.compile('^h[1-6]$')))
    num_label_under_form = len([item for a in [f.find_all('label') for f in soup.find_all("form")] for item in a])
    num_input_no_image_submit_reset_button_hidden = len(soup.find_all("input", type=lambda x: x not in ["image", "submit", "reset", "button", "hidden"]))
    num_select = len(soup.find_all("select"))
    num_textarea = len(soup.find_all("textarea"))
    num_label = len(soup.find_all("label"))
    num_a = len(soup.find_all("a"))
    num_th = len(soup.find_all("th"))

    soup = None

    return {
        "alt_area_missing": num_area,
        "alt_input_missing": num_input_type_image,
        "alt_link_missing": num_img,
        "alt_map_missing": num_input_usemap,
        "alt_missing": num_img,
        "alt_spacer_missing": num_img,
        "aria_menu_broken": num_role_menu,
        "aria_reference_broken": num_aria_describedby + num_aria_labelledby,
        "blink": num_blink,
        "button_empty": num_button + num_input_type_submit + num_input_type_button + num_input_type_reset,
        "heading_empty": num_headings,
        "label_empty": num_label_under_form,
        "label_missing": num_input_no_image_submit_reset_button_hidden + num_select + num_textarea,
        "label_multiple": num_label,
        "language_missing": -1,
        # TODO: An anchor element has an href attribute, but contains no text (or only spaces) and no images with alternative text.
        # So, check href
        "link_empty": num_a, 
        "link_skip_broken": num_a,
        "marquee": -1,
        "meta_refresh": -1,
        "th_empty": num_th,
        "title_invalid": -1,
    }

In [17]:
# Test
count_num_potential_failures('http://trnadb.bioinf.uni-leipzig.de')


Cannot get url: http://trnadb.bioinf.uni-leipzig.de


In [18]:
from tqdm.notebook import tqdm
from pathlib import Path

In [19]:
# df = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_potential_failures.csv'))
# df.rename(columns={'data_filled': 'success'}, inplace=True)
# df['success'] = df['success'].apply(lambda x: 1 if x else 0)
# df.to_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_potential_failures.csv'), index=False)

In [25]:
RESOURCE = 'data-portal'
NUM_FILTER = 0

file = Path(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_potential_failures.csv'))

if file.exists():
    df_saved = pd.read_csv(file)
    # df = df.merge(df_saved.drop(columns='success'), how='outer', left_on=['page_id', 'id', 'page_type', 'url'], right_on=['page_id', 'id', 'page_type', 'url'])
else:
    df = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_evaluation.csv'))
    df = df[['page_id']]

    df_pages = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_pages.csv'))
    df_pages

    df = df.merge(df_pages, left_on='page_id', right_on='page_id', how='left')

    df['success'] = 0

# For debugging
if NUM_FILTER > 0:
    df = df.head(NUM_FILTER)

for _, row in tqdm(df.iterrows(), total=df.shape[0], desc='Processing data'):
    if row.success != 0:
        continue
    
    _url = row.url
    
    res = count_num_potential_failures(_url)

    if res is None:
        df.loc[_, 'success'] = -1
    else:
        for k, v in res.items():
            df.loc[_, f'error_{k}'] = v

        df.loc[_, 'success'] = 1

    df.to_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_potential_failures.csv'), index=False)

Processing data:   0%|          | 0/4461 [00:00<?, ?it/s]

Cannot get url: http://fantom.gsc.riken.jp/
Cannot get url: https://mirtarbase.cuhk.edu.cn/
Cannot get url: http://rfam.org/
Cannot get url: https://img.jgi.doe.gov/cgi-bin/w/main.cgi
Cannot get url: https://old.tcmsp-e.com/tcmsp.php
Cannot get url: http://www.hgmd.org
Cannot get url: https://unite.ut.ee/
Cannot get url: http://www.cgga.org.cn
Cannot get url: http://www.iprox.org
Cannot get url: http://117.50.127.228/CellMarker/
Cannot get url: http://www.wormbase.org/
Cannot get url: https://ngdc.cncb.ac.cn


KeyboardInterrupt: 

$$
A3 = 1 - \Pi_b (1 - F_b)^{\frac{B_pb}{N_pb} + \frac{B_pb}{B_p}}
$$

Equation 3 presents the formula for computing the A3 metric, where Bpb is the total of actual points of failure of a checkpoint b in page p, b is the barrier (checkpoint violation), Npb is the total of potential points of failure of a checkpoint b in page p, and Fb identifies the severity of a certain barrier b (this weight is calculated by simple heuristics, by combining the results of an automatic evaluation and manual testing or by disabled users feedback [22]). The authors of this metric performed an experimental study to compare the results between A3 and UWEM and understand the differences between them. A checkpoint weight of 0.05 was used for all checkpoints, assuming that all of them would have the same importance. This experiment was conducted with a group of six disabled users that evaluated six web pages. After applying both metrics, the authors concluded that A3 outperformed UWEM in the experiment [11].


In [None]:
SELECT_ERROR_CATEGORIES = [
    'alt_missing',
    'alt_input_missing',
    'alt_area_missing',
    'alt_input_missing',
    'alt_map_missing',
    'aria_menu_broken',
    'aria_reference_broken',
    'button_empty',
    'heading_empty',
    'label_empty',
    'label_missing',
    # 'link_empty', # TODO: need to fix this value
    'th_empty'
] 
# list(set(ERROR_CATEGORIES) - set(['count', 'language_missing', 'marquee', 'meta_refresh', 'title_invalid']))

original = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_evaluation.csv'))

temp1 = original[['page_id'] + [f'error_{c}' for c in SELECT_ERROR_CATEGORIES]].head(NUM_FILTER)
temp2 = df[['page_id'] + [f'error_{c}' for c in SELECT_ERROR_CATEGORIES]]

temp1.set_index('page_id', inplace=True)
temp2.set_index('page_id', inplace=True)

temp1.fillna(0, inplace=True)

Bp = temp1.sum(axis=1)
Bpb_over_Npb = temp1 / temp2
Bpb_over_Bp = temp1.div(Bp, axis=0)

Bpb_over_Npb.fillna(0, inplace=True)

Bpb_over_Npb_plus_Bpb_over_Bp = Bpb_over_Npb + Bpb_over_Bp

Fb = 0.05

for c in SELECT_ERROR_CATEGORIES:
    _c = f'error_{c}'
    Bpb_over_Npb_plus_Bpb_over_Bp[_c] = Bpb_over_Npb_plus_Bpb_over_Bp[_c].apply(lambda x: (1 - Fb) ** x)

a3 = Bpb_over_Npb_plus_Bpb_over_Bp.prod(axis=1)

a3 = 1 - a3

a3


In [27]:
a3.to_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_a3.csv'), index=True, header=True)

NameError: name 'a3' is not defined

In [None]:
temp1

In [None]:
temp2

In [None]:
df_pages.head(11)

## Check Data (Debug)

In [None]:
# Test
df = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_evaluation.csv'))

# Add `id` of resources and `page_type` of pages
df_pages = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_pages.csv'))
df = df.merge(df_pages[['id', 'page_id', 'page_type']], left_on='page_id', right_on='page_id', how='left')

# Add metadata of resources
df_meta = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_metadata.csv'))
# df_meta.drop(columns=['url'], inplace=True)
df = df.merge(df_meta, left_on='id', right_on='id', how='left')

# Add Metric
df_a3 = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, 'data-portal_a3.csv'))
df = df.merge(df_a3, left_on='page_id', right_on='page_id', how='left')
df.rename(columns={'0': 'a3'}, inplace=True)
df = df[~df.a3.isna()]

df_complexity = pd.read_csv(os.path.join('..', 'output', EVALUATION_DATE_FOLDER, f'{RESOURCE}_potential_failures.csv'))
df_complexity.drop(columns=['id', 'page_type', 'url'], inplace=True)
df_complexity.set_index('page_id', inplace=True)
df_complexity = df_complexity.sum(axis=1)
df_complexity = df_complexity.reset_index()
df_complexity.rename(columns={0: 'complexity'}, inplace=True)

df = df.merge(df_complexity, left_on='page_id', right_on='page_id', how='left')
df

In [None]:
df.head(3)

In [None]:
import altair as alt

In [None]:
len(df[(df.a3 > 0) & (df.a3 < 1)])

In [None]:
alt.Chart(
    df
).mark_bar().encode(
    alt.X(f'a3:Q'),
    alt.Y('count()').scale(type='log'),
)

In [None]:
alt.Chart(
    df[(df.complexity > 0) & (df.a3 > 0) & (df.a3 < 1)]
).mark_point().encode(
    alt.X(f'a3:Q'),
    alt.Y('complexity:Q').scale(type='log'),
    alt.Tooltip(['short_name', 'page_id'])
)

In [None]:
alt.Chart(
    df[(df.a3 < 1) & (df.a3 > 0)]
).mark_bar().encode(
    alt.X('founded_year:O'),
    alt.Y('mean(a3):Q'),
) & alt.Chart(
    df[(df.a3 < 1) & (df.a3 > 0)]
).mark_bar().encode(
    alt.X('founded_year:O'),
    alt.Y('mean(complexity):Q'),
)

In [None]:
COUNTRY_N = 20
TOP_COUNTRY_BY_COUNT = df[(df.a3 < 1) & (df.a3 > 0)].country.value_counts().reset_index().sort_values(by='count', ascending=False)[:COUNTRY_N].country.tolist()
TOP_COUNTRY_BY_COUNT

alt.Chart(
    df[(df.country.isin(TOP_COUNTRY_BY_COUNT)) & (df.a3 < 1) & (df.a3 > 0)]
).mark_bar().encode(
    alt.Y('country', sort='-x'),
    alt.X('mean(a3):Q'),
) | alt.Chart(
    df[(df.country.isin(TOP_COUNTRY_BY_COUNT)) & (df.a3 < 1) & (df.a3 > 0)]
).mark_bar().encode(
    alt.Y('country', sort='-x'),
    alt.X('mean(complexity):Q'),
    tooltip=['country', 'mean(a3)', 'mean(complexity)', 'short_name']
)