In [1]:
from collections import defaultdict, Counter
from multiprocessing import Pool
from pathlib import Path
from typing import Dict
import datetime
import json
import re

from durations_nlp import Duration
from durations_nlp.exceptions import InvalidTokenError
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import urllib
import tldextract

from consent.consistency.cookie_pref_match import cookie_pref_match
from consent.consistency.util import FIG_DIR, get_scan_dirs, get_scan_root_dir
from consent.data.pref_menu_scan.cookie_decl_reader import read_cookie_decls_in_scans
from consent.data.pref_menu_scan.har_cookie_reader import read_postrej_sent_cookies_in_scans
# from consent.data.site_pref import read_site_prefs
# from consent.data.pref_menu_scan.cookie_pref_reader import read_cookie_prefs_in_scans
# from consent.data.pref_menu_scan.log_reader import read_logs_in_scans
# from consent.util.default_path import get_data_dir
# from ooutil.type_util import hashabledict
# from ooutil.url_util import get_suffixed_domain
# from ooutil.cookie_util import get_brower_cookies
# from ooutil.func_util import memoize

# data_dir = get_data_dir('2021-05-01/pref_menu_scan_accept')
SCAN_DIRS = get_scan_dirs('us')
SCAN_ROOT_DIR = get_scan_root_dir('us')
fig_dir = Path.home() / 'local_projects/consent/paper/sp22/figures'
assert fig_dir.exists() and fig_dir.is_dir()

In [2]:
# scanned_sites = [site_dir.name for site_dir in data_dir.glob('*') if site_dir.is_dir()]
# site_dirs = [data_dir / site for site in scanned_sites]
# print("Number of scanned sites:", len(scanned_sites)) # , scanned_sites)

# Get cookie durations in declarations

In [3]:
# Get cookie prefs = cookie decls + prefs
# cookie_prefs = read_cookie_prefs_in_scans(SCAN_DIRS)
# cookie_prefs.head()
# TODO: rerun with cookie decls only, we do not need cookie prefs
raw_cookie_decls = read_cookie_decls_in_scans(SCAN_DIRS)
raw_cookie_decls



Unnamed: 0,name,domain,duration,category_id,category,consent_mode,site,lib_name,pattern_name
0,__we_bucket_id,www.wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
1,OptanonAlertBoxClosed,wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
2,OptanonConsent,www.wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
3,__we_request_id,www.wework.com,Session,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
4,we_referring_domain,www.wework.com,14 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
...,...,...,...,...,...,...,...,...,...
222,bzfyEmbedLoadedOaip32bvTNuXnPSDlJaawA,connect.getflowbox.com,Session,unclassified,unclassified,,vespa.com,cookiebot,cc.js
223,bzfyEmbedLoadedSWDUo4NQQsuLK9D2Q7T3kQ,vespa.com,Session,unclassified,unclassified,,vespa.com,cookiebot,cc.js
224,bzfyEmbedLoadedSWDUo4NQQsuLK9D2Q7T3kQ,connect.getflowbox.com,Session,unclassified,unclassified,,vespa.com,cookiebot,cc.js
225,randomID,vespa.com,Session,unclassified,unclassified,,vespa.com,cookiebot,cc.js


In [4]:
# def _osolete_analysis():
#     pref_menu_data_dir = get_data_dir('2021-05-01/pref_menu_scan')
#     pref_menu_sites = [site_dir.name for site_dir in pref_menu_data_dir.glob('*') if site_dir.is_dir()]
#     pref_menu_site_prefs = read_site_prefs([pref_menu_data_dir / site for site in pref_menu_sites])
#     print("Number of sites in pref-menu scan:", len(pref_menu_sites)) # , scanned_sites)
#     pref_menu_cookielist_sites = pref_menu_site_prefs.site.unique()
#     print("Number of sites with cookie list in pref-menu scan:", len(pref_menu_cookielist_sites)) # , scanned_sites)
#     missings = set(pref_menu_cookielist_sites) - set(site_prefs.site.unique())
#     n_sites_with_sett = site_prefs.site.nunique()
#     print("Missing sites:", missings)
#     redundants = set(site_prefs.site.unique()) - set(pref_menu_cookielist_sites)
#     print("Redudant sites:", redundants)
    # assert len(missings) == len(redundants) == 0, f"There is some mismatch."

In [5]:
# site_prefs = site_prefs[site_prefs.duration != '']
assert len(raw_cookie_decls[raw_cookie_decls.duration.isna()]) == 0
assert len(raw_cookie_decls[raw_cookie_decls.duration == '']) ==0

In [6]:
print(f"Most common preference durations:")
pref_dur_counts = raw_cookie_decls.duration.value_counts().to_frame().reset_index()
pref_dur_counts['freq'] = pref_dur_counts['duration'] / len(raw_cookie_decls) * 100
pref_dur_counts.head(10)
pref_dur_counts = pref_dur_counts[['index', 'freq']].rename(columns={'index': 'Duration'}).head(5)  # , 'freq': 'Frequency (%)'
print(pref_dur_counts.to_latex(index=False, float_format="%.2f"))

Most common preference durations:
\begin{tabular}{lr}
\toprule
Duration &  freq \\
\midrule
 Session & 25.09 \\
  0 days & 21.96 \\
365 days & 11.46 \\
730 days &  4.51 \\
 30 days &  3.81 \\
\bottomrule
\end{tabular}



In [7]:
print(f"Preference durations:")
print(raw_cookie_decls['duration'].unique())

Preference durations:
['365 days' 'Session' '14 days' ... '5995 days' '18858 days' '4931 days']


In [8]:
'Session' in raw_cookie_decls[raw_cookie_decls.lib_name == 'onetrust']['duration'].unique()

True

In [9]:
cookielist_sites = sorted(raw_cookie_decls.site.unique().tolist())
n_cookielist_sites = len(cookielist_sites)
assert raw_cookie_decls.site.nunique() == n_cookielist_sites
# print("Sites with cookie list:", n_cookielist_sites, cookielist_sites)

# Read in the post-rejection cookies to find the actual cookie durations.

In [11]:

# raw_postrej_sent_cookies_file = data_dir / 'raw_postrej_sent_cookies.parquet'
# postrej_sent_cookies_file = data_dir / 'postrej_sent_cookies.parquet'
# if raw_postrej_sent_cookies_file.exists():
#     raw_postrej_sent_cookies = pd.read_parquet(raw_postrej_sent_cookies_file)
#     postrej_sent_cookies = pd.read_parquet(postrej_sent_cookies_file)
# else:
#     # postrej_sent_cookies = read_postrej_sent_cookies(site_dirs[:4])
#     raw_postrej_sent_cookies = parallel_read_postrej_sent_cookies(data_dir, keep_sent_cookie=True)
#     postrej_sent_cookies = raw_postrej_sent_cookies.drop_duplicates()
#     raw_postrej_sent_cookies.to_parquet(raw_postrej_sent_cookies_file); print(f"Written to {raw_postrej_sent_cookies_file}")
#     postrej_sent_cookies.to_parquet(postrej_sent_cookies_file); print(f"Written to {postrej_sent_cookies_file}")

# raw_postrej_sent_cookies.head(3)
overwrite = False
cookies_cache_file = SCAN_ROOT_DIR / 'scan.parquet'  # 'raw_postrej_sent_cookies.parquet'

if not overwrite and cookies_cache_file and cookies_cache_file.exists():
    sent_cookies = pd.read_parquet(cookies_cache_file)
else:
    sent_cookies = read_postrej_sent_cookies_in_scans(SCAN_DIRS)
    if cookies_cache_file: sent_cookies.to_parquet(cookies_cache_file); print(f"Written to {cookies_cache_file}")

print(f"Number sent cookies read: {len(sent_cookies):,d}")
sent_cookies.head(3)

Number sent cookies read: 5,152,870


Unnamed: 0,name,value,domain,path,expires,size,httpOnly,secure,session,sameSite,priority,sameParty,sourceScheme,sourcePort,request_url,site,page_url
0,ajs_user_id,%22e465a0f34b3bdaa686645b9f88599d5563d0e8562db...,.wework.com,/,1671123000.0,81,False,False,False,Lax,Medium,False,Secure,443,https://www-static.wework.com/apercu/apercu_mo...,wework.com,https://www.wework.com/l/coworking-space/manila
1,_ga,GA1.1.1271302899.1639586858,.wework.com,/,1702659000.0,30,False,False,False,,Medium,False,Secure,443,https://www.wework.com/vanilla-assets/javascri...,wework.com,https://www.wework.com/l/coworking-space/manila
2,ajs_user_id,%22e465a0f34b3bdaa686645b9f88599d5563d0e8562db...,.wework.com,/,1671123000.0,81,False,False,False,Lax,Medium,False,Secure,443,https://www.wework.com/vanilla-assets/fonts/ic...,wework.com,https://www.wework.com/l/coworking-space/manila


In [12]:
postrej_sent_cookies = sent_cookies.drop_duplicates()
print(f"Num raw captured cookies: {len(sent_cookies):,d}")
print(f"Num unique captured cookies: {len(postrej_sent_cookies):,d}")

Num raw captured cookies: 5,152,870
Num unique captured cookies: 5,128,820


# Combine the actual and declared durations

In [13]:
def find_pref(cookie, site_pref_dicts, site):
    def match(acookie, site_pref):
        # print(f"{acookie=}")
        # print(f"{site_pref=}")
        return cookie_pref_match(acookie, site_pref, site)

    # found = site_pref_df[ site_pref_df.apply(match, axis=1) ]
    found = [site_pref_dict for site_pref_dict in site_pref_dicts if match(cookie, site_pref_dict)]
    if len(found) > 1:
        # print(f"WARNING: multiple site pref found (contradictory pref) {found=}")
        found = found[0:1]
    return found

def combine_cookie_pref(cookies, site_pref_df: pd.DataFrame, site):
    """Combine cookie to decl. TODO: rename pref to decl"""
    new_rows = []
    # site_pref_dicts = tuple(hashabledict(d) for d in site_pref_df.to_dict('records'))
    site_pref_dicts = site_pref_df.to_dict('records')
    for _, cookie in cookies.iterrows():       
        cookie = cookie.to_dict()

        # Only needs these attributes; reduce to memoize
        # cookie_to_match = hashabledict({k: cookie[k] for k in ["name", "domain"]})
        cookie_to_match = cookie
        for pref in find_pref(cookie_to_match, site_pref_dicts, site):
            new_row = cookie.copy()
            for pref_k, pref_v in pref.items():
                new_row['decl_' + pref_k] = pref_v
            new_rows.append(new_row)

    return pd.DataFrame(new_rows)

def get_combine_cookie_decls():
    combine_dfs = []
    for site in tqdm(postrej_sent_cookies.site.unique()):
        cookies = postrej_sent_cookies[postrej_sent_cookies.site == site]
        site_decl_df = raw_cookie_decls[raw_cookie_decls.site == site]
        combine_dfs.append(combine_cookie_pref(cookies, site_decl_df, site))
        
    return pd.concat(combine_dfs, ignore_index=True)

# cookie_prefs_file = data_dir / 'cookie_prefs.parquet'
# if False: # cookie_prefs_file.exists():
#     cookie_prefs = pd.read_parquet(cookie_prefs_file)
# else:
# TODO: change pref to decl
cookie_decls = get_combine_cookie_decls()
#     cookie_prefs.to_parquet(cookie_prefs_file); print(f"Written to {cookie_prefs_file}")
cookie_decls # 26 mins   

100%|██████████| 1622/1622 [26:17<00:00,  1.03it/s] 


Unnamed: 0,name,value,domain,path,expires,size,httpOnly,secure,session,sameSite,...,page_url,decl_name,decl_domain,decl_duration,decl_category_id,decl_category,decl_consent_mode,decl_site,decl_lib_name,decl_pattern_name
0,ajs_user_id,%22e465a0f34b3bdaa686645b9f88599d5563d0e8562db...,.wework.com,/,1.671123e+09,81,False,False,False,Lax,...,https://www.wework.com/l/coworking-space/manila,ajs_user_id,wework.com,365 days,C0004,Targeting Cookies,always active,wework.com,onetrust,en.json
1,_ga,GA1.1.1271302899.1639586858,.wework.com,/,1.702659e+09,30,False,False,False,,...,https://www.wework.com/l/coworking-space/manila,_ga,wework.com,730 days,C0002,Performance Cookies,always active,wework.com,onetrust,en.json
2,ajs_user_id,%22e465a0f34b3bdaa686645b9f88599d5563d0e8562db...,.wework.com,/,1.671123e+09,81,False,False,False,Lax,...,https://www.wework.com/l/coworking-space/manila,ajs_user_id,wework.com,365 days,C0004,Targeting Cookies,always active,wework.com,onetrust,en.json
3,_kuid_,OiuKv-_v,.krxd.net,/,1.655139e+09,14,False,True,False,,...,https://www.wework.com/l/coworking-space/manila,_kuid_,krxd.net,180 days,C0004,Targeting Cookies,always active,wework.com,onetrust,en.json
4,kampyleSessionPageCounter,1,www.wework.com,/,1.671123e+09,26,False,True,False,,...,https://www.wework.com/l/coworking-space/manila,kampyleSessionPageCounter,www.wework.com,365 days,C0003,Functional Cookies,always active,wework.com,onetrust,en.json
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2868388,_gcl_au,1.1.1311567854.1639897653,.vespa.com,/,1.647674e+09,32,False,False,False,,...,https://www.vespa.com/us_EN/timeline/,_gcl_au,vespa.com,3 months,marketing,marketing,,vespa.com,cookiebot,cc.js
2868389,_ga,GA1.2.1432834110.1639897653,.vespa.com,/,1.702970e+09,30,False,False,False,,...,https://www.vespa.com/us_EN/timeline/,_ga,vespa.com,2 years,marketing,marketing,,vespa.com,cookiebot,cc.js
2868390,RT,"""z=1&dm=vespa.com&si=qmw6w14fpf&ss=kxcwogih&sl...",.vespa.com,/,1.640502e+09,63,False,False,False,,...,https://www.vespa.com/us_EN/timeline/,RT,vespa.com,6 days,necessary,necessary,,vespa.com,cookiebot,cc.js
2868391,_gcl_au,1.1.1311567854.1639897653,.vespa.com,/,1.647674e+09,32,False,False,False,,...,https://www.vespa.com/us_EN/timeline/,_gcl_au,vespa.com,3 months,marketing,marketing,,vespa.com,cookiebot,cc.js


In [14]:
def get_duration_sec(cookie):
    if cookie['expires'] == -1:
        return -1
    return cookie['expires'] - cookie['load_end_time']

cookie_decls['dur_sec'] = cookie_decls.apply(get_duration_sec, axis=1)
cookie_decls.head()

KeyError: 'load_end_time'

In [None]:
# cookie_prefs[(cookie_prefs.name == 'eyeblaster') & (cookie_prefs.site == 'scientificamerican.com') ]
# cookie_decls[(cookie_decls.site == 'scientificamerican.com') ] # & (cookie_decls.domain == 'm6r.eu') ]

In [None]:
warnings = []
def parse_cookie_dur(pref_dur: str):
    pref_dur = pref_dur.lower().strip()
    if pref_dur == 'session':
        return -1
    if pref_dur == '0 days':  # equivalent to 'a few seconds' in onetrust
        return 100
    if pref_dur == 'persistent':
        return float('inf')
    try:
        return Duration(pref_dur).to_seconds()
    except Exception as e:
        warnings.append(f"Warning: {e} {pref_dur}")
        pass
    # Solve some rare cases:
    parts = pref_dur.split()
    assert len(parts) == 2
    cardinal, unit = float(parts[0]), parts[1]
    if unit == 'years':
        return cardinal * Duration('1 year').to_seconds()
    #     raise ValueError(f"Do not support {pref_dur}")
    return None

def test_parse_pref_dur():
    for dur in cookie_decls.pref_duration.unique():
        print(f'{dur} -> {int(parse_cookie_dur(dur)):,d}')
# test_parse_pref_dur()

In [None]:
# TODO: change pref_ to decl_ prefix
cookie_decls['decl_dur_sec'] = cookie_decls['decl_duration'].map(parse_cookie_dur)
cookie_decls

Unnamed: 0,domain,expires,httpOnly,name,path,sameSite,secure,value,request_url,site,...,decl_domain,decl_duration,decl_category_id,decl_category,decl_consent_mode,decl_site,decl_lib_name,decl_pattern_name,dur_sec,decl_dur_sec
0,.apachefriends.org,1.629067e+09,False,_gid,/,,False,GA1.2.720720591.1628980892,https://www.apachefriends.org/images/flags/fr-...,apachefriends.org,...,apachefriends.org,1 days,C0002,Performance,active,apachefriends.org,onetrust,en.json,8.634152e+04,86400.0
1,.apachefriends.org,1.660517e+09,False,OptanonAlertBoxClosed,/,Lax,False,2021-08-14T22:42:17.927Z,https://www.apachefriends.org/javascripts/all-...,apachefriends.org,...,apachefriends.org,365 days,C0001,Strictly Necessary,always active,apachefriends.org,onetrust,en.json,3.153599e+07,31536000.0
2,.apachefriends.org,1.660517e+09,False,OptanonConsent,/,Lax,False,isIABGlobal=false&datestamp=Sat+Aug+14+2021+22...,https://www.apachefriends.org/images/flags/fr-...,apachefriends.org,...,apachefriends.org,365 days,C0001,Strictly Necessary,always active,apachefriends.org,onetrust,en.json,3.153599e+07,31536000.0
3,.apachefriends.org,1.629067e+09,False,_gid,/,,False,GA1.2.720720591.1628980892,https://www.apachefriends.org/images/flags/hu-...,apachefriends.org,...,apachefriends.org,1 days,C0002,Performance,active,apachefriends.org,onetrust,en.json,8.634152e+04,86400.0
4,.apachefriends.org,1.660517e+09,False,OptanonAlertBoxClosed,/,Lax,False,2021-08-14T22:42:17.927Z,https://www.apachefriends.org/index.html,apachefriends.org,...,apachefriends.org,365 days,C0001,Strictly Necessary,always active,apachefriends.org,onetrust,en.json,3.153599e+07,31536000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3006098,.midomi.com,1.628946e+09,False,_gid,/,,False,GA1.2.345739105.1628859927,https://www.midomi.com/img/png/SoundHound_boto...,midomi.com,...,midomi.com,1 days,C0001,Essential Website Cookies,always active,midomi.com,onetrust,en.json,8.633199e+04,86400.0
3006099,.midomi.com,1.628946e+09,False,_gid,/,,False,GA1.2.345739105.1628859927,https://static.midomi.com/s/ud/GIM/016/07-/T1-...,midomi.com,...,midomi.com,1 days,C0001,Essential Website Cookies,always active,midomi.com,onetrust,en.json,8.633199e+04,86400.0
3006100,.doubleclick.net,1.691932e+09,True,IDE,/,,True,AHWqTUn3owNXIyq812HUzj65-XvMunwserbtL9tjiIw2Sh...,https://googleads.g.doubleclick.net/xbbe/pixel...,midomi.com,...,doubleclick.net,390 days,C0004,Advertising (Targeting) Cookies,inactive landingpage,midomi.com,onetrust,en.json,6.307193e+07,33696000.0
3006101,.midomi.com,1.628946e+09,False,_gid,/,,False,GA1.2.345739105.1628859927,https://www.midomi.com/logo/app-store-download...,midomi.com,...,midomi.com,1 days,C0001,Essential Website Cookies,always active,midomi.com,onetrust,en.json,8.633199e+04,86400.0


In [None]:
cookie_decls = cookie_decls[~cookie_decls.decl_dur_sec.isna()]

In [None]:
len(cookie_decls)

3002554

In [None]:
# set(warnings) # these are spanish ...

In [None]:
declared_durations = cookie_decls.decl_duration.value_counts()

In [None]:
n_sites_with_dur = cookie_decls.site.nunique()
# print(f"Num sites with duration: {n_sites_with_dur/n_sites_with_sett*100:.2f} ({n_sites_with_dur}/{n_sites_with_sett})")
dur_vios = cookie_decls[cookie_decls.decl_dur_sec < cookie_decls.dur_sec]
n_violate_sites = dur_vios.site.nunique()
print(f"Num sites with duration violations: {n_violate_sites / n_sites_with_dur * 100:.2f}% ({n_violate_sites}/{n_sites_with_dur})")

Num sites with duration violations: 76.49% (1025/1340)


In [None]:
dur_vios_counts = dur_vios.decl_duration.value_counts().to_frame().reset_index()
dur_vios_counts['nspecified'] = dur_vios_counts['index'].map(declared_durations)
dur_vios_counts['percent_too_short'] = dur_vios_counts['decl_duration'] / dur_vios_counts['nspecified'] * 100
dur_vios_counts.head(5)

Unnamed: 0,index,decl_duration,nspecified,percent_too_short
0,0 days,461319,513081,89.911534
1,Session,49107,612401,8.018765
2,365 days,35128,602452,5.830838
3,16 days,27948,27948,100.0
4,395 days,17166,30622,56.057736


In [None]:
duration_to_percent_too_short = {row['index']: row['percent_too_short'] for _, row in dur_vios_counts.iterrows()}
pref_dur_counts['percent_too_short'] = pref_dur_counts.Duration.map(duration_to_percent_too_short)
pref_dur_counts_latex = pref_dur_counts.rename(columns={"Duration": "Cookie Duration", "freq": r"Frequency (%)", "percent_too_short": r"% of Declared-too-short Cookies"})
pref_dur_counts_latex.replace({'0 days': 'A few seconds'}, inplace=True)
print(pref_dur_counts_latex.to_latex(float_format="%.2f", index=False))

\begin{tabular}{lrr}
\toprule
Cookie Duration &  Frequency (\%) &  \% of Declared-too-short Cookies \\
\midrule
        Session &          23.27 &                             8.02 \\
  A few seconds &          22.58 &                            89.91 \\
       365 days &          11.75 &                             5.83 \\
       730 days &           4.55 &                             3.81 \\
        30 days &           3.97 &                            10.49 \\
\bottomrule
\end{tabular}



In [None]:
# Analyze actual duration of "a few seconds"
dur_vios[dur_vios.pref_duration == "a few seconds"].sort_values(by='dur_sec')

Unnamed: 0,domain,expires,httpOnly,name,path,sameSite,secure,value,request_url,sent_name,...,load_start_time,load_end_time,pref_duration,pref_host,pref_name,pref_group_id,pref_site,pref_consent,dur_sec,pref_dur_sec
24824,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/ssoSessionCheck?callback=...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24769,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/cdn-cgi/scripts/5c5dd728/...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24772,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/products/marlin/fonts/nex...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24775,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/pb-assets/adobe/conversio...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24794,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/products/marlin/fonts/els...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93688,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/img/locke...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93708,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/gd-fj-...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93743,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/dist/g...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93589,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/dist/g...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
