# keyboard accessibility analysis

## Installs & Loads

In [1]:
# %pip install numpy pandas matplotlib seaborn scikit-learn

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import json

In [3]:
file = "keyboard-accessibility-results.json"

In [None]:
data = json.load(open(file))

## Save all metadata info from each issue for future reference

In [None]:
all_ids = {}

data0 = data[0]['axeResults']

data0['inapplicable']

for issue in data0['inapplicable'] + data0['passes'] + data0['incomplete'] + data0['violations']:
    issue_temp = issue.copy()
    id = issue['id']
    issue_temp.pop('id')
    issue_temp.pop('impact')
    issue_temp.pop('nodes')
    all_ids[id] = issue_temp

In [None]:
# data[0]

## Find matrix of tools x issues for N/A, pass, incomplete, violation

In [None]:
tools = [x['name'] for x in data]
issues = list(all_ids.keys())
issues.sort()

In [None]:
tool_overview = {}
for i in range(len(data)): 
    data_i = data[i]['axeResults']
    tool_overview[tools[i]] = {"inapplicable": [issue['id'] for issue in data_i['inapplicable']],
                               "passes": [issue['id'] for issue in data_i['passes']],
                               "incomplete": [issue['id'] for issue in data_i['incomplete']],
                               "violations": [issue['id'] for issue in data_i['violations']]}

In [None]:
rows = {}
for tool, tool_data in tool_overview.items():
    row = {}
    for issue_id in issues:
        if issue_id in tool_data['violations']:
            row[issue_id] = 'violation'
        elif issue_id in tool_data['passes']:
            row[issue_id] = 'pass'
        elif issue_id in tool_data['incomplete']:
            row[issue_id] = 'incomplete'
        elif issue_id in tool_data['inapplicable']:
            row[issue_id] = 'N/A'
        else:
            row[issue_id] = 'N/A'
    rows[tool] = row

df_matrix = pd.DataFrame.from_dict(rows, orient='index')
df_matrix.index.name = 'Tool'

In [None]:
df_matrix.head(10)

In [None]:
na_counts = (df_matrix == 'N/A').sum()
all_na_columns = na_counts[na_counts == len(df_matrix)].index.tolist()

print("Columns where all tools are N/A:")
print(all_na_columns)


In [None]:
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches

color_map = {
    'N/A': "#ffffff",
    'pass': "#30d336",
    'incomplete': '#ffeb3b',
    'violation': "#f82719"
}

categories = list(color_map.keys())
cat_to_num = {cat: i for i, cat in enumerate(categories)}
df_num = df_matrix.replace(cat_to_num).astype(int)
cmap = ListedColormap([color_map[cat] for cat in categories])

plt.figure(figsize=(20, 20))
ax = sns.heatmap(
    df_num,
    cmap=cmap,
    cbar=False,
    linewidths=0.5,
    linecolor='gray'
)

legend_handles = [mpatches.Patch(color=color, label=label) for label, color in color_map.items()]
plt.legend(
    handles=legend_handles,
    title="AXE Result",
    bbox_to_anchor=(1.05, 1),
    loc='upper left',
    fontsize=14,
    title_fontsize=16
)

plt.xticks(rotation=90, fontsize=12)
plt.yticks(rotation=0, fontsize=12)
plt.xlabel("AXE Issue", fontsize=20)
plt.ylabel("Tool", fontsize=20)
plt.title("AXE Accessibility Analysis", fontsize=30)

plt.show()

## Compare issues to INSCIDAR issues

In [None]:
df_issues = pd.read_csv("inscidar_issues.csv")
issues_inscidar = list(df_issues['ID'])
issues_axe = all_ids.keys()

In [None]:
set(issues_inscidar) - set(issues_axe)


In [None]:
set(issues_axe) - set(issues_inscidar)


## Do we have all tools?

In [None]:
df_tofinddataapp = pd.read_csv("To Find Data App URLs - uniques_by_url_manually_pruned.csv")

In [None]:
df_tofinddataapp.head(10)

In [None]:
df_tofinddataapp_valid = df_tofinddataapp[df_tofinddataapp["Data App URL"].notna()]
df_tofinddataapp_valid.shape

Seems like there are 20 missing ones, which are they?

In [None]:
# check if there are duplicates
print(list(df_tofinddataapp_valid['Name']) == set(df_tofinddataapp_valid['Name']))
print(list(tools) == set(tools))

In [None]:
# find duplicates
from collections import Counter

counts_dataapp = Counter(df_tofinddataapp_valid['Name'])
counts_tools = Counter(tools)

duplicates_dataapp = [item for item, count in counts_dataapp.items() if count > 1]
duplicates_tools = [item for item, count in counts_tools.items() if count > 1]

print(duplicates_dataapp)
print(duplicates_tools)

In [None]:
set(df_tofinddataapp_valid['Name']) - set(tools)

In [None]:
set(tools) - set(df_tofinddataapp_valid['Name'])

In [None]:
set(tools) - set(df_tofinddataapp['Name'])

So there are quite a few valid data app URLs from which we do not have the AXE results. Probably because these gave some other error.

There are 3 tools that were not part of the data app URLs that we do have AXE results from. How?

## Failure rates

In [None]:
header = ['tool_name', 'url', 'issue_id', 'issue_desc', 'issue_impact', 'issue_help', 'issue_url', 'violations', 'passes', 'total_checks', 'failure_rate']

In [None]:
rows = []

for data_i in data:
    tool_name = data_i['name']
    url = data_i['url']
    axeResults = data_i['axeResults']

    issue_dict = {}

    for issue in axeResults['passes']:
        issue_id = issue['id']
        issue_dict[issue_id] = {
            'tool_name': tool_name,
            'issue_id': issue_id,
            'issue_desc': issue['description'],
            'issue_impact': issue['impact'],
            'issue_help': issue['help'],
            'issue_url': issue['helpUrl'],
            'passes': len(issue['nodes']),
            'violations': 0
        }

    for issue in axeResults['violations']:
        issue_id = issue['id']
        if issue_id in issue_dict:
            issue_dict[issue_id]['violations'] = len(issue['nodes'])
            issue_dict[issue_id]['issue_impact'] = issue['impact']
        else:
            issue_dict[issue_id] = {
                'tool_name': tool_name,
                'issue_id': issue_id,
                'issue_desc': issue['description'],
                'issue_impact': issue['impact'],
                'issue_help': issue['help'],
                'issue_url': issue['helpUrl'],
                'passes': 0,
                'violations': len(issue['nodes'])
            }

    rows.extend(issue_dict.values())

df_reports = pd.DataFrame(rows)
df_reports['total_checks'] = df_reports['passes'] + df_reports['violations']
df_reports['failure_rate'] = df_reports['violations'] / df_reports['total_checks'].replace(0, np.nan)


In [None]:
df_reports.head(10)

Similar to INSCIDAR

In [None]:
reports = df_reports

In [None]:
"""
Store the column names for metadata of pages
"""
ISSUE_COLUMNS = [
    'issue_id',
    'issue_desc',
    'issue_impact',
    'issue_help',
    'issue_url',
    # 'issue_name',
    # 'issue_filter',
    # 'issue_overall_impact',
    # 'issue_note_overall_impact_hdv',
    # 'issue_severity',
    # 'issue_data_related',
    # 'issue_data_related_rule',
    # 'issue_pour_category',
    # 'issue_wcag_level',
    # 'issue_difficulty_to_fix',
    # 'issue_missing_label_related'
]
PAGE_METADATA_COLUMNS = [a for a in reports.columns.tolist() if a not in (ISSUE_COLUMNS + ['violations', 'passes', 'total_checks', 'failure_rate'])]

In [None]:
PAGE_METADATA_COLUMNS

In [None]:
"""
Group by Page
"""
reports_by_page = reports.groupby(
    PAGE_METADATA_COLUMNS,
    dropna=False 
).agg({
    'violations': 'sum',
    'passes': 'sum',
    'total_checks': 'sum',
}).reset_index()

"""
Calculate the failure rate
"""
reports_by_page['failure_rate'] = reports_by_page.violations / reports_by_page.total_checks

In [None]:
reports_by_page.head(10)

In [None]:
reports_by_page_sorted = reports_by_page.sort_values(by="failure_rate", ascending=True)

In [None]:
average_tool_failure_rate = reports_by_page['failure_rate'].mean()
average_tool_failure_rate

In [None]:
print(reports_by_page['failure_rate'].min())
print(reports_by_page['failure_rate'].max())

In [None]:
plt.figure(figsize=(8, 6))
plt.barh(reports_by_page_sorted["tool_name"], reports_by_page_sorted["failure_rate"])
plt.xlabel("Failure Rate")
plt.ylabel("Tool Name")
plt.title("Accessibility Failure Rate by Tool")
plt.xticks(size=8)
plt.yticks(size=5)
plt.show()


In [None]:
plt.figure(figsize=(8, 6))
plt.barh(reports_by_page_sorted["tool_name"][-30:], reports_by_page_sorted["failure_rate"][-30:])
plt.xlabel("Failure Rate")
plt.ylabel("Tool Name")
plt.title("Accessibility Failure Rate by Tool for Top 30 Tools")
plt.xticks(size=8)
plt.yticks(size=8)
plt.show()

In [None]:
"""
Group by Issue
"""
reports_by_issue = reports.groupby(
    ['issue_id'],
    dropna=False 
).agg({
    'violations': 'sum',
    'passes': 'sum',
    'total_checks': 'sum',
}).reset_index()

In [None]:
reports_by_issue.head(10)

In [None]:
reports_by_issue_sorted = reports_by_issue.sort_values(by="violations", ascending=False)
reports_by_issue_sorted.head(10)

average_violations = reports_by_issue['violations'].mean()
average_violations

## Violations

In [None]:
# find violation severity

# is severity different for different tools? or always the same with the same issue id?
mapping_issue_impact = {}

for data_i in data:
    tool_name = data_i['name']
    axeResults = data_i['axeResults']
    for issue in axeResults['violations']:
        issue_id = issue['id']
        impact = issue['impact']
        if issue_id not in mapping_issue_impact:
            mapping_issue_impact[issue_id] = set()
        mapping_issue_impact[issue_id].add(impact)
        
for issue_id, impacts in mapping_issue_impact.items():
    if len(impacts) > 1:
        print(f"Issue ID {issue_id} has multiple impacts: {impacts}")

# so each issue has always the same impact
mapping_issue_impact = {issue_id: list(impacts)[0] for issue_id, impacts in mapping_issue_impact.items()}

In [None]:
mapping_issue_impact

In [None]:
"""
Group by Impact
"""
reports['impact'] = reports['issue_id'].map(mapping_issue_impact)
reports_by_impact = reports.groupby(
    ['impact'],
    dropna=False 
).agg({
    'violations': 'sum',
    'passes': 'sum',
    'total_checks': 'sum',
}).reset_index()

reports_by_impact.sort_values(by="impact", key=lambda x: x.map({'critical': 0, 'serious': 1, 'moderate': 2, 'minor': 3}), ascending=True)

## Keyboard specific issues

In [None]:
categoriesToCheck = [
    "cat.keyboard","wcag131","wcag211","wcag213","wcag221","wcag241","wcag332","wcag412",
    "wcag132","wcag1413","wcag212","wcag214","wcag243","wcag247","wcag251","wcag253","wcag321","wcag322","wcag325" # <- axe doesn't seem to test for these
]

In [None]:
keyboard_issues = [issue for issue in all_ids.keys() if any(tag in categoriesToCheck for tag in all_ids[issue]["tags"])]

In [None]:
print(len(all_ids.keys()))
print(len(keyboard_issues))

In [None]:
keyboard_issues_impact = {issue: mapping_issue_impact[issue] if issue in mapping_issue_impact else "unknown" for issue in keyboard_issues}
keyboard_issues_impact

Should we rerun all with just these issues? What to report?