## This notebook parses successfull runs from the database and displays them in charts, for easy study.

In [18]:
from pymongo import MongoClient
import pymongo

In [19]:
connector = MongoClient("localhost")

In [20]:
db = connector["ccrawler"]

In [21]:
runs = db["runs"]

In [22]:
succ_runs = runs.find({"status": "runDone"})

In [23]:
# Time to look for data
totalSites = 0
parsed_runs = []
perScreenFilled = []
perTickedBoxes = []
confirmSizes = { "bigger": 0, "same": 0 , "smaller": 0}
readabilityARI = {}
readabilityFLESH = {}
amntSitesMoreThanAYear = 0
sitesWithMoreBtn = 0
moreBtnRedir = 0

In [24]:
# Lets work on data
for item in succ_runs:
    totalSites += 1
    res = {}
    res["windowArea"] = item["browserSize"]["width"] * item["browserSize"]["height"]
    res["noticeArea"] = item["notice"]["size"]['height'] * item["notice"]["size"]['width']
    res["noticeOfFull"] = round((res["noticeArea"] / res["windowArea"]), 2)
    if res["noticeOfFull"] <= 1:
        perScreenFilled.append(res["noticeOfFull"]*100)
    if item["settings"]["totalCheckboxes"] > 0:
        perTickedBoxes.append(round(item["settings"]["checkedCheckboxes"]/item["settings"]["totalCheckboxes"], 2)*100)
    if item["notice"]["apprBtn"] and item["notice"]["moreBtn"]:
        sizeOfAppr = item["notice"]["apprBtn"]["size"]["width"]*item["notice"]["apprBtn"]["size"]["height"]
        sizeOfMore = item["notice"]["moreBtn"]["size"]["width"]*item["notice"]["moreBtn"]["size"]["height"]
        if sizeOfAppr > sizeOfMore:
            confirmSizes["bigger"] += 1
        elif sizeOfAppr < sizeOfMore:
            confirmSizes["smaller"] += 1
        elif sizeOfAppr == sizeOfMore:
            confirmSizes["same"] += 1
    # Readability ARI - by grade level
    if item["settings"]["readabilityARI"]["grade_levels"]:
        if item["settings"]["readabilityARI"]["grade_levels"][0] in readabilityARI:
            readabilityARI[item["settings"]["readabilityARI"]["grade_levels"][0]] += 1
        else:
            readabilityARI[item["settings"]["readabilityARI"]["grade_levels"][0]] = 1
     # Readability FLESH - by ease
    if item["settings"]["readabilityFLESH"]["ease"]:
        if item["settings"]["readabilityFLESH"]["ease"] in readabilityFLESH:
            readabilityFLESH[item["settings"]["readabilityFLESH"]["ease"]] += 1
        else:
            readabilityFLESH[item["settings"]["readabilityFLESH"]["ease"]] = 1
    # Lets see if we have cookies saved for more than a year.
    endedScanSinceEpoch = item["endedAt"].timestamp()
    for cookie in item["startCookies"]:
        if "expiry" in cookie:
            #The cookie has an expiry date, when tho?
            secondsToExpire = cookie["expiry"] - endedScanSinceEpoch
            if secondsToExpire >= 31556926: # A year in seconds.
                amntSitesMoreThanAYear += 1
                break
    #Lets check if moreBtn is a redirect.
    if item["notice"] and "moreBtn" in item["notice"]:
        sitesWithMoreBtn += 1
        if item["notice"]["moreBtn"]["redirect"]:
            moreBtnRedir += 1
            

## Percentage of screen blocked by notice:

In [25]:
import pandas as pd
import plotly.express as px
plotdata = pd.DataFrame({"perFilled": perScreenFilled})
fig = px.histogram(plotdata, x="perFilled", nbins= 30, 
                   labels={
                     "perFilled": "Percentage of screen filled by notice",
                 })
fig.update_layout(
    title="Percentage of screen blocked by notice on first load:",
    yaxis_title="Amount of notices",
    xaxis_title="Percentage of screen filled by notice",
)
fig.show()


## Percentage of checkboxes pre ticked:

In [26]:
plotdata = pd.DataFrame({"Percentage Ticked Boxes / All Boxes on page": perTickedBoxes})
fig = px.histogram(plotdata, x="Percentage Ticked Boxes / All Boxes on page", nbins= 30,
                  )
fig.update_layout(
    title="Percentage of page checkboxes pre ticked on settings load:",
    yaxis_title="Amount of notices",
    xaxis_title="Percentage of pre ticked boxes compared to all boxes in settings",
)
fig.show()

## Consent Approve/More buttons compared between each other:

In [27]:
import plotly.graph_objects as go
fig = go.Figure(go.Bar(
    x=['Consent > More', 'Consent = More', 'Consent < More'],
    y=[confirmSizes["bigger"],confirmSizes["same"],confirmSizes["smaller"]]
))
fig.update_layout(
    title="Consent approve/more buttons size compared:",
    xaxis_title="Size difference",
    yaxis_title="Amount of notices",
)
fig.show()

 ## Readability level by FLESH/ARI grade level:

In [28]:
data = pd.DataFrame(readabilityFLESH, index =["amount"]).transpose()

In [29]:
fig = px.bar(data, y='amount')
fig.update_layout(
    title="Readability by FLESH grade level:",
    xaxis_title="Level of readability",
    yaxis_title="Amount of notices",
)
fig.show()

In [30]:
data = pd.DataFrame(readabilityARI, index =["amount"]).transpose()
fig = px.bar(data, y='amount')
fig.update_layout(
    title="Readability by ARI grade level:",
    xaxis_title="Level of readability",
    yaxis_title="Amount of notices",
)
fig.show()

## Sites with cookies that saved for more than a year:

In [31]:
fig = go.Figure(go.Bar(
    x=['Cookies > 1 year expiry', 'No cookies > 1 year expiry',],
    y=[amntSitesMoreThanAYear,totalSites-amntSitesMoreThanAYear]
))
fig.update_layout(
    title="Cookie expiry difference:",
    xaxis_title="Cookie expiry time",
    yaxis_title="Amount of notices",
)
fig.show()

## Does the site redirect the user when rejecting cookies:

In [32]:
fig = go.Figure(go.Bar(
    x=['Yes', 'No'],
    y=[moreBtnRedir,sitesWithMoreBtn-moreBtnRedir]
))
fig.update_layout(
    title="Does the notice redirect to new page:",
    xaxis_title="Does redirection",
    yaxis_title="Amount of notices",
)
fig.show()