In [1]:
import warnings
import pandas as pd
import numpy as np
import sqlite3
import psycopg2
import sys
from sqlalchemy import create_engine
from scipy import stats

warnings.simplefilter(action="ignore", category=FutureWarning)

In [2]:
url = "postgresql+psycopg2://admin:secret@localhost:5432/accessibility_monitoring_app"
engine = create_engine(url)

In [3]:
pd.set_option("display.max_columns", None)

df = pd.read_sql("SELECT * FROM public.audits_checkresult;", engine)
df

Unnamed: 0,id,is_deleted,type,check_result_state,notes,audit_id,page_id,wcag_definition_id,retest_notes,retest_state
0,15,False,axe,error,Refers to the blue 'i' button in the 'Contact ...,1,1,69,,fixed
1,34,False,axe,error,° Turquoise and white\r\n° Red text for the cu...,1,2,23,,fixed
2,2,False,manual,no-error,,1,1,10,,not-retested
3,3,False,manual,no-error,,1,1,11,,not-retested
4,4,False,manual,no-error,,1,1,12,,not-retested
...,...,...,...,...,...,...,...,...,...,...
3734,3202,False,manual,error,* The user cannot keyboard tab to the arrows f...,157,1106,7,,not-retested
3735,3203,False,manual,error,* The 'Discover' submenu options receive tab f...,157,1106,9,,not-retested
3736,3737,False,manual,error,,157,1106,11,,not-retested
3737,3738,False,manual,error,,157,1106,13,,not-retested


In [4]:
wcag_definitions_df = pd.read_sql("SELECT * FROM public.audits_wcagdefinition;", engine)
wcag_definitions_df

Unnamed: 0,id,type,name,description,url_on_w3,report_boilerplate,date_start,date_end
0,1,pdf,WCAG 1.4.3 Contrast (Minimum),,https://www.w3.org/WAI/WCAG21/Understanding/co...,Poor colour contrast makes it difficult for so...,,
1,2,pdf,WCAG 2.4.2 Page titled,,https://www.w3.org/WAI/WCAG21/Understanding/pa...,PDF documents should have titles that describe...,,
2,3,pdf,WCAG 3.1.1 Language of Page,,https://www.w3.org/WAI/WCAG21/Understanding/la...,Assistive technologies are more accurate when ...,,
3,4,pdf,WCAG 1.3.1 Info and Relationships,In tables,https://www.w3.org/WAI/WCAG21/Understanding/in...,Information in tables must be shown in a way t...,,
4,6,pdf,WCAG 1.1.1 Non-text content,,https://www.w3.org/WAI/WCAG21/Understanding/no...,People with sight loss may not see an image cl...,,
...,...,...,...,...,...,...,...,...
72,43,axe,WCAG 1.3.5 Identify Input Purpose,Autocomplete attribute must be used correctly,https://www.w3.org/WAI/WCAG21/Understanding/id...,Autocomplete attribute must be used correctly....,,
73,52,axe,WCAG 2.4.2 Page Titled,Documents must contain a title element to aid ...,https://www.w3.org/WAI/WCAG21/Understanding/pa...,Pages should have titles that describe the top...,,
74,65,axe,"WCAG 4.1.2 Name, Role, Value",aria-roledescription must be on elements with ...,https://www.w3.org/WAI/WCAG21/Understanding/na...,Incorrect use of ARIA attributes can cause con...,,
75,29,axe,WCAG 1.1.1 Non-text Content,SVG images and graphics require accessible text,https://www.w3.org/WAI/WCAG21/Understanding/no...,People with sight loss may not be able to see ...,,


In [6]:
id_to_definition_dict = dict(zip(wcag_definitions_df.id, wcag_definitions_df.name))
df["wcag_definition"] = df.replace({"wcag_definition_id": id_to_definition_dict})["wcag_definition_id"]
df

Unnamed: 0,id,is_deleted,type,check_result_state,notes,audit_id,page_id,wcag_definition_id,retest_notes,retest_state,wcag_definition
0,15,False,axe,error,Refers to the blue 'i' button in the 'Contact ...,1,1,69,,fixed,"WCAG 4.1.2 Name, Role, Value"
1,34,False,axe,error,° Turquoise and white\r\n° Red text for the cu...,1,2,23,,fixed,WCAG 1.4.3 Contrast (minimum)
2,2,False,manual,no-error,,1,1,10,,not-retested,WCAG 1.4.4. Resize Text
3,3,False,manual,no-error,,1,1,11,,not-retested,WCAG 1.4.10 Reflow
4,4,False,manual,no-error,,1,1,12,,not-retested,WCAG 1.2.1 Audio-only and video-only (prerecor...
...,...,...,...,...,...,...,...,...,...,...,...
3734,3202,False,manual,error,* The user cannot keyboard tab to the arrows f...,157,1106,7,,not-retested,WCAG 2.1.1 Keyboard
3735,3203,False,manual,error,* The 'Discover' submenu options receive tab f...,157,1106,9,,not-retested,WCAG 2.4.7 Focus Visible
3736,3737,False,manual,error,,157,1106,11,,not-retested,WCAG 1.4.10 Reflow
3737,3738,False,manual,error,,157,1106,13,,not-retested,WCAG 1.2.2 Captions (prerecorded)


In [28]:
most_common_violations = df[df["retest_state"] != "not-retested"]["wcag_definition"].value_counts().where(lambda x : x > 10).dropna()
most_common_violations

WCAG 2.4.7 Focus Visible                                                 209.0
WCAG 4.1.2 Name, Role, Value                                             165.0
WCAG 2.1.1 Keyboard                                                      149.0
WCAG 1.4.3 Contrast (minimum)                                            138.0
WCAG 1.3.1 Info and Relationships                                         96.0
WCAG 2.4.4 Link Purpose (In Context) and WCAG 4.1.2 Name, Role, Value     93.0
WCAG 1.1.1 Non-text Content                                               41.0
WCAG 2.4.3 Focus Order                                                    31.0
WCAG 1.4.10 Reflow                                                        30.0
WCAG 1.4.3 Contrast (Minimum)                                             28.0
WCAG 2.4.1 Bypass Blocks and WCAG 4.1.2 Name, Role, Value                 28.0
WCAG 2.4.2 Page titled                                                    27.0
WCAG 2.1.2 No Keyboard Traps                        

In [39]:
fixed_errors = df[
    (df["check_result_state"] == "error")
    & (df["retest_state"] == "fixed")
]

unfixed_errors = df[
    (df["check_result_state"] == "error")
    & (df["retest_state"] == "not-fixed")
]

fixed_errors = fixed_errors[fixed_errors['wcag_definition'].isin(most_common_violations.index)]
unfixed_errors = unfixed_errors[unfixed_errors['wcag_definition'].isin(most_common_violations.index)]

In [40]:
fixed_errors["wcag_definition"].value_counts()

WCAG 2.4.7 Focus Visible                                                 155
WCAG 4.1.2 Name, Role, Value                                             130
WCAG 2.1.1 Keyboard                                                      118
WCAG 1.4.3 Contrast (minimum)                                            105
WCAG 2.4.4 Link Purpose (In Context) and WCAG 4.1.2 Name, Role, Value     79
WCAG 1.3.1 Info and Relationships                                         78
WCAG 1.1.1 Non-text Content                                               31
WCAG 2.4.3 Focus Order                                                    28
WCAG 1.4.10 Reflow                                                        25
WCAG 1.4.3 Contrast (Minimum)                                             23
WCAG 2.4.1 Bypass Blocks and WCAG 4.1.2 Name, Role, Value                 19
WCAG 2.4.2 Page titled                                                    19
WCAG 2.1.2 No Keyboard Traps                                              16

In [41]:
unfixed_errors["wcag_definition"].value_counts()

WCAG 2.4.7 Focus Visible                                                 54
WCAG 4.1.2 Name, Role, Value                                             35
WCAG 1.4.3 Contrast (minimum)                                            33
WCAG 2.1.1 Keyboard                                                      31
WCAG 1.3.1 Info and Relationships                                        18
WCAG 2.4.4 Link Purpose (In Context) and WCAG 4.1.2 Name, Role, Value    14
WCAG 1.1.1 Non-text Content                                              10
WCAG 2.4.1 Bypass Blocks and WCAG 4.1.2 Name, Role, Value                 9
WCAG 2.4.2 Page titled                                                    8
WCAG 1.3.1 Info and Relationships and WCAG 4.1.2 Name, Role, Value        8
WCAG 1.4.3 Contrast (Minimum)                                             5
WCAG 1.4.10 Reflow                                                        5
WCAG 2.4.3 Focus Order                                                    3
WCAG 2.1.2 N

In [42]:
# pd.DataFrame()
temp = pd.DataFrame(dict(unfixed_errors = unfixed_errors["wcag_definition"].value_counts(), fixed_errors = fixed_errors["wcag_definition"].value_counts()))
temp = temp.fillna(0)
temp["total"] = temp["unfixed_errors"] + temp["fixed_errors"]
temp["unfixed_errors_ratio"] =  temp["unfixed_errors"] / temp["total"]
temp["fixed_errors_ratio"] =  temp["fixed_errors"] / temp["total"]

temp

# unfixed_errors["wcag_definition"].value_counts().to_frame().join(fixed_errors["wcag_definition"].value_counts())

Unnamed: 0,unfixed_errors,fixed_errors,total,unfixed_errors_ratio,fixed_errors_ratio
WCAG 1.1.1 Non-text Content,10.0,31,41.0,0.243902,0.756098
WCAG 1.3.1 Info and Relationships,18.0,78,96.0,0.1875,0.8125
"WCAG 1.3.1 Info and Relationships and WCAG 4.1.2 Name, Role, Value",8.0,6,14.0,0.571429,0.428571
WCAG 1.4.10 Reflow,5.0,25,30.0,0.166667,0.833333
WCAG 1.4.3 Contrast (Minimum),5.0,23,28.0,0.178571,0.821429
WCAG 1.4.3 Contrast (minimum),33.0,105,138.0,0.23913,0.76087
WCAG 2.1.1 Keyboard,31.0,118,149.0,0.208054,0.791946
WCAG 2.1.2 No Keyboard Traps,2.0,16,18.0,0.111111,0.888889
"WCAG 2.2.2 Pause, Stop, Hide",2.0,10,12.0,0.166667,0.833333
"WCAG 2.4.1 Bypass Blocks and WCAG 4.1.2 Name, Role, Value",9.0,19,28.0,0.321429,0.678571


In [44]:
temp.sort_values("unfixed_errors_ratio")

Unnamed: 0,unfixed_errors,fixed_errors,total,unfixed_errors_ratio,fixed_errors_ratio
WCAG 4.1.1 Parsing,0.0,15,15.0,0.0,1.0
WCAG 2.4.3 Focus Order,3.0,28,31.0,0.096774,0.903226
WCAG 2.1.2 No Keyboard Traps,2.0,16,18.0,0.111111,0.888889
"WCAG 2.4.4 Link Purpose (In Context) and WCAG 4.1.2 Name, Role, Value",14.0,79,93.0,0.150538,0.849462
WCAG 1.4.10 Reflow,5.0,25,30.0,0.166667,0.833333
"WCAG 2.2.2 Pause, Stop, Hide",2.0,10,12.0,0.166667,0.833333
WCAG 1.4.3 Contrast (Minimum),5.0,23,28.0,0.178571,0.821429
WCAG 1.3.1 Info and Relationships,18.0,78,96.0,0.1875,0.8125
WCAG 2.1.1 Keyboard,31.0,118,149.0,0.208054,0.791946
"WCAG 4.1.2 Name, Role, Value",35.0,130,165.0,0.212121,0.787879


In [45]:
temp.sort_values("fixed_errors_ratio")

Unnamed: 0,unfixed_errors,fixed_errors,total,unfixed_errors_ratio,fixed_errors_ratio
"WCAG 1.3.1 Info and Relationships and WCAG 4.1.2 Name, Role, Value",8.0,6,14.0,0.571429,0.428571
"WCAG 2.4.1 Bypass Blocks and WCAG 4.1.2 Name, Role, Value",9.0,19,28.0,0.321429,0.678571
WCAG 2.4.2 Page titled,8.0,19,27.0,0.296296,0.703704
WCAG 2.4.7 Focus Visible,54.0,155,209.0,0.258373,0.741627
WCAG 1.1.1 Non-text Content,10.0,31,41.0,0.243902,0.756098
WCAG 1.4.3 Contrast (minimum),33.0,105,138.0,0.23913,0.76087
"WCAG 4.1.2 Name, Role, Value",35.0,130,165.0,0.212121,0.787879
WCAG 2.1.1 Keyboard,31.0,118,149.0,0.208054,0.791946
WCAG 1.3.1 Info and Relationships,18.0,78,96.0,0.1875,0.8125
WCAG 1.4.3 Contrast (Minimum),5.0,23,28.0,0.178571,0.821429
