In [9]:
import pandas as pd
import altair as alt
import json
from theme import apply_theme
alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

# Visualize Evaluation Results

In [3]:
data = []
with open('../output/a11y-reports.json', 'r') as f:
    reports = json.load(f)
    for report in reports:
        if report['report']['status']['success'] == False:
            continue
        
        categories = report['report']['categories']

        row = {}
        row['dbId'] = report['dbId']
        row['shortName'] = report['shortName']
        row['url'] = report['url']
        row['error'] = categories['error']['count']
        row['contrast'] = categories['contrast']['count']
        row['alert'] = categories['alert']['count']
        data.append(row)
        
data = pd.DataFrame.from_records(data)
data['error'] += data['contrast']

N = len(data)

data

# len(data[data.error == 0])

Unnamed: 0,dbId,shortName,url,error,contrast,alert
0,3061,DAVID,https://david.ncifcrf.gov,39,37,38
1,184,KEGG,https://www.kegg.jp,3,1,5
2,6569,cBioPortal,http://cbioportal.org,1202,490,159
3,317,ENCODE,https://www.encodeproject.org/,11,3,2
4,62,STRING,https://string-db.org/,55,26,33
...,...,...,...,...,...,...
2931,6315,Trace Archive,https://www.ncbi.nlm.nih.gov/Traces/trace.cgi,0,0,0
2932,6317,Retroviruses,https://www.ncbi.nlm.nih.gov/genome/viruses/re...,1,0,7
2933,6318,TPA,https://www.ncbi.nlm.nih.gov/genbank/tpa,2,2,11
2934,4894,plaza 4.0,https://bioinformatics.psb.ugent.be/plaza,31,29,20


## Overview

In [4]:
metric = [
    ('errors', 'error', '#CC7DAA'),
    # ('contrast issues', 'contrast', '#D6641E'),
    ('alerts', 'alert', '#E6A01B'),
]

for (name, field, hex) in metric:
    plot_alert = alt.Chart(data).mark_bar().encode(
        y=alt.X(f'{field}:Q', scale=alt.Scale(type='log'), title=f'The number of {name}'),
        x=alt.Y('shortName:N', sort='-y', title='Data portals', axis=alt.Axis(labelAngle=300)),
        color=alt.value(hex)
    ).transform_filter(
        alt.datum[field] > 0  
    ).properties(
        title=f'The Number of "{name.capitalize()}" By Data Portals'
    )

    if name == 'errors':
        # https://webaim.org/projects/million/
        mean = 43.7
        r = alt.Chart().mark_rule(color='black', strokeDash=[10, 10]).encode(y=alt.datum(mean))
        t = alt.Chart().mark_text(color='black', strokeDash=[10, 10], angle=0, dy=-10).encode(y=alt.datum(mean), text=alt.datum('WAVE Average'))
        plot_alert += (r + t)

    plot_alert = apply_theme(plot_alert)
    # plot_alert.save('report-error.png') # does not work
    plot_alert.display()

## Detailed issues

In [5]:
details = []
with open('../output/a11y-reports.json', 'r') as f:
    reports = json.load(f)
    for report in reports:
        metrics = ['error', 'contrast', 'alert']
        for m in metrics:
            if report['report']['status']['success'] == False:
                continue

            stats = report['report']['categories'][m]['items']

            row = {}
            row['dbId'] = report['dbId']
            row['shortName'] = report['shortName']
            row['url'] = report['url']
            row['type'] = m

            for e in stats:
                name = stats[e]['id']
                count = stats[e]['count']
                row_copy = row.copy()
                row_copy['name'] = name
                row_copy['count'] = count
                
                details.append(row_copy)

details = pd.DataFrame.from_records(details)
details

Unnamed: 0,dbId,shortName,url,type,name,count
0,3061,DAVID,https://david.ncifcrf.gov,error,label_missing,1
1,3061,DAVID,https://david.ncifcrf.gov,error,language_missing,1
2,3061,DAVID,https://david.ncifcrf.gov,contrast,contrast,37
3,3061,DAVID,https://david.ncifcrf.gov,alert,alt_suspicious,15
4,3061,DAVID,https://david.ncifcrf.gov,alert,h1_missing,1
...,...,...,...,...,...,...
21659,5369,MetaCyc,http://MetaCyc.org,alert,noscript,1
21660,5369,MetaCyc,http://MetaCyc.org,alert,javascript_jumpmenu,25
21661,5369,MetaCyc,http://MetaCyc.org,alert,text_small,12
21662,5369,MetaCyc,http://MetaCyc.org,alert,table_layout,30


In [10]:
plot = alt.Chart(details).mark_rect(
    stroke='white',
    opacity=1
).encode(
    # x=alt.X('count', scale=alt.Scale(type='linear'), title='The number of issues'),
    x=alt.X('shortName:N', sort='-color', axis=alt.Axis(labelAngle=270, grid=True, zindex=0)),
    y=alt.Y('name:N', title=None, axis=alt.Axis(grid=True, zindex=0)),
    color=alt.Color(
        'count:Q',
        scale=alt.Scale(type='log', domain=[1, 100], clamp=True),   
        legend=alt.Legend(gradientLength=300)
    ),
    row=alt.Row('type:N', header=alt.Header(labelOrient='left', title=None), sort=['error', 'contrast', 'alert'])
    # row=alt.Row('name:N', title=None, header=alt.Header(
    #     labelOrient='left', labelAngle=0, labelAnchor='middle', labelAlign='left'
    # ), spacing=0)
).transform_filter(
    alt.datum['count'] > 0
).resolve_scale(y='independent').properties(
    # height=40,
    # width=600
)
plot = apply_theme(plot)
plot

In [17]:
c = [
    ('alt_link_missing', 'lightgrey'),
    ('alt_missing', '#CC7DAA'),
    ('alt_spacer_missing', 'lightgrey'),
    ('aria_menu_broken', 'lightgrey'),
    ('aria_reference_broken', 'lightgrey'),
    ('button_empty', 'lightgrey'),
    ('contrast', 'lightgrey'),
    ('heading_empty', 'lightgrey'),
    ('label_empty', '#CC7DAA'),
    ('label_missing', '#CC7DAA'),
    ('label_multiple', 'lightgrey'),
    ('language_missing', 'lightgrey'),
    ('link_empty', 'lightgrey'),
    ('link_skip_broken', 'lightgrey'),
    ('th_empty', 'lightgrey'),
    ('title_invalid', 'lightgrey'),
]
by_category = details.copy()
by_category = by_category[by_category.type != 'alert']
by_category = by_category.groupby(by=['name']).count()
by_category['proportion'] = by_category['count'] / N
by_category = by_category.reset_index()

plot = alt.Chart(by_category).mark_bar(
    stroke='white',
    opacity=1
).encode(
    x=alt.X('name:N', sort='-y', axis=alt.Axis(labelAngle=300, grid=False, zindex=1)),
    y=alt.Y('proportion:Q', axis=alt.Axis(format='%'), scale=alt.Scale(domain=[0, 1])),
    color=alt.Color('name:N', scale=alt.Scale(domain=[d for (d, r) in c], range=[r for (d, r) in c]), legend=None),
).resolve_scale(y='independent').properties(
    # height=40,
    width=800
)
plot = apply_theme(plot)
plot

In [12]:
df = data.copy()
df.error += df.contrast
df['no_errors'] = data.error.apply(lambda x: 'no errors' if x == 0 else 'with errors')
df = df.groupby(by='no_errors').count()
df['percent'] = df.error / N
df = df.reset_index()

plot = alt.Chart(df).mark_bar().encode(
    x=alt.X('no_errors:N'),
    y=alt.Y('percent:Q', title=None, axis=alt.Axis(format='%')),
    color=alt.Color('no_errors:N', legend=None, scale=alt.Scale(range=['#3275B4', '#CC7DAA'])),
    tooltip=[
        'error',
        'percent'
    ]
).properties(
    title='The % of Homepages with & without Any Errors',
    width=300
)

plot = apply_theme(plot)

plot