In [1]:
from collections import defaultdict, Counter
from multiprocessing import Pool
from pathlib import Path
from typing import Dict
import datetime
import json
import re

from durations_nlp import Duration
from durations_nlp.exceptions import InvalidTokenError
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import urllib
import tldextract

from consent.consistency.cookie_pref_match import cookie_pref_match
from consent.consistency.util import FIG_DIR, get_scan_dirs, get_scan_root_dir
from consent.data.pref_menu_scan.cookie_decl_reader import read_cookie_decls_in_scans
from consent.data.pref_menu_scan.har_cookie_reader import read_postrej_sent_cookies_in_scans
# from consent.data.site_pref import read_site_prefs
# from consent.data.pref_menu_scan.cookie_pref_reader import read_cookie_prefs_in_scans
# from consent.data.pref_menu_scan.log_reader import read_logs_in_scans
# from consent.util.default_path import get_data_dir
# from ooutil.type_util import hashabledict
# from ooutil.url_util import get_suffixed_domain
# from ooutil.cookie_util import get_brower_cookies
# from ooutil.func_util import memoize

# data_dir = get_data_dir('2021-05-01/pref_menu_scan_accept')
location = 'de'
SCAN_DIRS = get_scan_dirs(location)
SCAN_ROOT_DIR = get_scan_root_dir(location)
print(f'SCAN DIRS: {[str(d) for d in SCAN_DIRS]}')
fig_dir = Path.home() / 'local_projects/consent/paper/sp22/figures'
assert fig_dir.exists() and fig_dir.is_dir()

SCAN DIRS: ['/mnt/sdd/ducbui/projects/data2/consent/2023-04-14/pref_menu_scan_0k_20k', '/mnt/sdd/ducbui/projects/data2/consent/2023-04-14/pref_menu_scan_20k_100k']


In [2]:
# scanned_sites = [site_dir.name for site_dir in data_dir.glob('*') if site_dir.is_dir()]
# site_dirs = [data_dir / site for site in scanned_sites]
# print("Number of scanned sites:", len(scanned_sites)) # , scanned_sites)

# Get cookie durations in declarations

In [3]:
# Get cookie prefs = cookie decls + prefs
# cookie_prefs = read_cookie_prefs_in_scans(SCAN_DIRS)
# cookie_prefs.head()
# TODO: rerun with cookie decls only, we do not need cookie prefs
raw_cookie_decls = read_cookie_decls_in_scans(SCAN_DIRS)
raw_cookie_decls

Unnamed: 0,name,domain,duration,category_id,category,consent_mode,site,lib_name,pattern_name
0,user,www.wework.com,Session,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
1,__we_request_id,www.wework.com,Session,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
2,ajs_anonymous_id,wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
3,_gclxxxx,wework.com,90 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
4,__we_bucket_id,www.wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
...,...,...,...,...,...,...,...,...,...
112,YSC,youtube.com,Session,C0004,Targeting Cookies,inactive,loggly.com,onetrust,en.json
113,_kuid_,krxd.net,179 days,C0004,Targeting Cookies,inactive,loggly.com,onetrust,en.json
114,A3,yahoo.com,365 days,C0004,Targeting Cookies,inactive,loggly.com,onetrust,en.json
115,browser_id,www.slideshare.net,1825 days,C0004,Targeting Cookies,inactive,loggly.com,onetrust,en.json


In [4]:
# def _osolete_analysis():
#     pref_menu_data_dir = get_data_dir('2021-05-01/pref_menu_scan')
#     pref_menu_sites = [site_dir.name for site_dir in pref_menu_data_dir.glob('*') if site_dir.is_dir()]
#     pref_menu_site_prefs = read_site_prefs([pref_menu_data_dir / site for site in pref_menu_sites])
#     print("Number of sites in pref-menu scan:", len(pref_menu_sites)) # , scanned_sites)
#     pref_menu_cookielist_sites = pref_menu_site_prefs.site.unique()
#     print("Number of sites with cookie list in pref-menu scan:", len(pref_menu_cookielist_sites)) # , scanned_sites)
#     missings = set(pref_menu_cookielist_sites) - set(site_prefs.site.unique())
#     n_sites_with_sett = site_prefs.site.nunique()
#     print("Missing sites:", missings)
#     redundants = set(site_prefs.site.unique()) - set(pref_menu_cookielist_sites)
#     print("Redudant sites:", redundants)
    # assert len(missings) == len(redundants) == 0, f"There is some mismatch."

In [5]:
# site_prefs = site_prefs[site_prefs.duration != '']
n_nan_durations = len(raw_cookie_decls[raw_cookie_decls.duration.isna()])
if n_nan_durations > 0:
    print(f"WARNING: Some duration is nan: {n_nan_durations}")
    raw_cookie_decls = raw_cookie_decls[~pd.isna(raw_cookie_decls.duration)]
assert len(raw_cookie_decls[raw_cookie_decls.duration == '']) ==0

In [6]:
print(f"Most common preference durations:")
pref_dur_counts = raw_cookie_decls.duration.value_counts().to_frame().reset_index()
pref_dur_counts['freq'] = pref_dur_counts['duration'] / len(raw_cookie_decls) * 100
pref_dur_counts.head(10)
pref_dur_counts = pref_dur_counts[['index', 'freq']].rename(columns={'index': 'Duration'}).head(5)  # , 'freq': 'Frequency (%)'
print(pref_dur_counts.to_latex(index=False, float_format="%.2f"))

Most common preference durations:
\begin{tabular}{lr}
\toprule
Duration &  freq \\
\midrule
  0 days & 27.95 \\
 Session & 18.66 \\
365 days & 13.67 \\
364 days &  6.03 \\
730 days &  3.91 \\
\bottomrule
\end{tabular}



  print(pref_dur_counts.to_latex(index=False, float_format="%.2f"))


In [7]:
print(f"Preference durations:")
print(raw_cookie_decls['duration'].unique())

Preference durations:
['Session' '365 days' '90 days' '14 days' '730 days' '0 days' '1 days'
 '729 days' '17 days' '7 days' '364 days' '392 days' '3649 days'
 '390 days' '2 days' '180 days' '179 days' '89 days' '726 days' '182 days'
 '29 days' '3650 days' '389 days' '6149 days' '30 days' '731 days'
 '394 days' '16 days' '395 days' '6168 days' '397 days' '1080 days'
 '184 days' '6 days' '366 days' '185 days' '60 days' '2914038 days'
 '1825 days' '181 days' '7304 days' '393 days' '360 days' '1092 days'
 '4537 days' '1824 days' '6170 days' '3652 days' '183 days' '3998 days'
 '6156 days' '1000 days' '725 days' '7299 days' '1091 days' '13 days'
 '59 days' '177 days' '722 days' '31 days' '1487 days' '1095 days'
 '732 days' '396 days' '270 days' '120 days' '356 days' '214 days'
 '540 days' '6181 days' '1005 days' '1889 days' '1459 days' '3 days'
 '7300 days' '1096 days' '19 days' '7014 days' '2914074 days' '6129 days'
 '710 days' '6015 days' '300 days' '299 days' '1460 days' '18249 days'
 '72

In [8]:
'Session' in raw_cookie_decls[raw_cookie_decls.lib_name == 'onetrust']['duration'].unique()

True

In [9]:
cookielist_sites = sorted(raw_cookie_decls.site.unique().tolist())
n_cookielist_sites = len(cookielist_sites)
assert raw_cookie_decls.site.nunique() == n_cookielist_sites
# print("Sites with cookie list:", n_cookielist_sites, cookielist_sites)

# Read in the post-rejection cookies to find the actual cookie durations.

In [10]:

# raw_postrej_sent_cookies_file = data_dir / 'raw_postrej_sent_cookies.parquet'
# postrej_sent_cookies_file = data_dir / 'postrej_sent_cookies.parquet'
# if raw_postrej_sent_cookies_file.exists():
#     raw_postrej_sent_cookies = pd.read_parquet(raw_postrej_sent_cookies_file)
#     postrej_sent_cookies = pd.read_parquet(postrej_sent_cookies_file)
# else:
#     # postrej_sent_cookies = read_postrej_sent_cookies(site_dirs[:4])
#     raw_postrej_sent_cookies = parallel_read_postrej_sent_cookies(data_dir, keep_sent_cookie=True)
#     postrej_sent_cookies = raw_postrej_sent_cookies.drop_duplicates()
#     raw_postrej_sent_cookies.to_parquet(raw_postrej_sent_cookies_file); print(f"Written to {raw_postrej_sent_cookies_file}")
#     postrej_sent_cookies.to_parquet(postrej_sent_cookies_file); print(f"Written to {postrej_sent_cookies_file}")

# raw_postrej_sent_cookies.head(3)
overwrite = False # True
cookies_cache_file = SCAN_ROOT_DIR / 'scan.parquet'  # 'raw_postrej_sent_cookies.parquet'

if not overwrite and cookies_cache_file and cookies_cache_file.exists():
    sent_cookies = pd.read_parquet(cookies_cache_file)
else:
    sent_cookies = read_postrej_sent_cookies_in_scans(SCAN_DIRS)
    if cookies_cache_file: sent_cookies.to_parquet(cookies_cache_file); print(f"Written to {cookies_cache_file}")

print(f"Number sent cookies read: {len(sent_cookies):,d}")
sent_cookies.head(3)

Unmatch url and consent cookie domain: page_url='https://www.ciriontechnologies.com/es/' consent_cookie.cookie['domain']='.lumen.com' consent_cookie.cookie['path']='/' savvis.net/postrej_5.har.xz
Unmatch url and consent cookie domain: page_url='https://www.hearstmagazines.co.uk/mens-health-magazine-subscription-website?utm_source=menshealth.com&utm_medium=referral&utm_campaign=us-websites' consent_cookie.cookie['domain']='.menshealth.com' consent_cookie.cookie['path']='/' menshealth.com/postrej_3.har.xz
Unmatch url and consent cookie domain: page_url='https://hrtechprivacy.com/brands/glassdoor#privacypolicy' consent_cookie.cookie['domain']='.glassdoor.com' consent_cookie.cookie['path']='/' glassdoor.com/postrej_3.har.xz
Error reading har file /mnt/sdd/ducbui/projects/data2/consent/2023-04-14/pref_menu_scan_0k_20k/allergan.com/postrej_3.har.xz: 'DataFrame' object has no attribute 'request'
Unmatch url and consent cookie domain: page_url='https://myaccount.suse.com/' consent_cookie.cooki

Unnamed: 0,name,value,domain,path,expires,size,httpOnly,secure,session,sameSite,priority,sameParty,sourceScheme,sourcePort,request_url,site,page_url
0,ajs_anonymous_id,8b883f8e-dd49-4ffa-bc01-56e536e0e19c,.wework.com,/,1713082000.0,52,False,False,False,Lax,Medium,False,Secure,443,https://www.wework.com/,wework.com,https://www.wework.com/l/commercial-real-estat...
1,ajs_anonymous_id,8b883f8e-dd49-4ffa-bc01-56e536e0e19c,.wework.com,/,1713082000.0,52,False,False,False,Lax,Medium,False,Secure,443,https://www-static.wework.com/apercu/apercu.css,wework.com,https://www.wework.com/l/commercial-real-estat...
2,we_referring_domain,,www.wework.com,/,1682755000.0,19,False,False,False,,Medium,False,Secure,443,https://www.wework.com/l/commercial-real-estat...,wework.com,https://www.wework.com/l/commercial-real-estat...


In [11]:
postrej_sent_cookies = sent_cookies.drop_duplicates()
print(f"Num raw captured cookies: {len(sent_cookies):,d}")
print(f"Num unique captured cookies: {len(postrej_sent_cookies):,d}")

Num raw captured cookies: 13,055,212
Num unique captured cookies: 12,982,601


# Combine the actual and declared durations

In [16]:
def find_pref(cookie, site_pref_dicts, site):
    def match(acookie, site_pref):
        # print(f"{acookie=}")
        # print(f"{site_pref=}")
        return cookie_pref_match(acookie, site_pref, site, fuzzy_name=True)  #  Germany: turn on fuzzy name due to a large number of errors.

    # found = site_pref_df[ site_pref_df.apply(match, axis=1) ]
    found = [site_pref_dict for site_pref_dict in site_pref_dicts if match(cookie, site_pref_dict)]
    if len(found) > 1:
        # print(f"WARNING: multiple site pref found (contradictory pref) {found=}")
        found = found[0:1]
    return found

def combine_cookie_pref(cookies, site_pref_df: pd.DataFrame, site):
    """Combine cookie to decl. TODO: rename pref to decl"""
    new_rows = []
    # site_pref_dicts = tuple(hashabledict(d) for d in site_pref_df.to_dict('records'))
    site_pref_dicts = site_pref_df.to_dict('records')
    for _, cookie in cookies.iterrows():       
        cookie = cookie.to_dict()

        # Only needs these attributes; reduce to memoize
        # cookie_to_match = hashabledict({k: cookie[k] for k in ["name", "domain"]})
        cookie_to_match = cookie
        for pref in find_pref(cookie_to_match, site_pref_dicts, site):
            new_row = cookie.copy()
            for pref_k, pref_v in pref.items():
                new_row['decl_' + pref_k] = pref_v
            new_rows.append(new_row)

    return pd.DataFrame(new_rows)

def get_combine_cookie_decls():
    combine_dfs = []
    for site in tqdm(postrej_sent_cookies.site.unique()[:10]):
        cookies = postrej_sent_cookies[postrej_sent_cookies.site == site]
        site_decl_df = raw_cookie_decls[raw_cookie_decls.site == site]
        if len(cookies) > 10000:
            print(f'WARNING: skip processing site due to too many cookies: {site=}, {len(cookies)=} {len(site_decl_df)=}')
            continue
        combine_dfs.append(combine_cookie_pref(cookies, site_decl_df, site))
        
    return pd.concat(combine_dfs, ignore_index=True)

# cookie_prefs_file = data_dir / 'cookie_prefs.parquet'
# if False: # cookie_prefs_file.exists():
#     cookie_prefs = pd.read_parquet(cookie_prefs_file)
# else:
# TODO: change pref to decl
cookie_decls = get_combine_cookie_decls()
#     cookie_prefs.to_parquet(cookie_prefs_file); print(f"Written to {cookie_prefs_file}")
cookie_decls # 26 mins   

100%|██████████| 10/10 [01:14<00:00,  7.46s/it]


Unnamed: 0,name,value,domain,path,expires,size,httpOnly,secure,session,sameSite,...,page_url,decl_name,decl_domain,decl_duration,decl_category_id,decl_category,decl_consent_mode,decl_site,decl_lib_name,decl_pattern_name
0,we_search_params,%7B%7D,www.wework.com,/,1.682755e+09,22,False,False,False,,...,https://www.wework.com/l/commercial-real-estat...,we_search_params,www.wework.com,14 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
1,ajs_anonymous_id,8b883f8e-dd49-4ffa-bc01-56e536e0e19c,.wework.com,/,1.713082e+09,52,False,False,False,Lax,...,https://www.wework.com/l/commercial-real-estat...,ajs_anonymous_id,wework.com,365 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
2,we_search_params,%7B%7D,www.wework.com,/,1.682755e+09,22,False,False,False,,...,https://www.wework.com/l/commercial-real-estat...,we_search_params,www.wework.com,14 days,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
3,__we_request_id,1d797cd9-0b08-4789-a586-459f0dcaff82,www.wework.com,/,-1.000000e+00,51,False,True,True,Lax,...,https://www.wework.com/l/commercial-real-estat...,__we_request_id,www.wework.com,Session,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
4,user,{%22formUUID%22:%2267d89ca5-bacb-41d7-9841-34b...,www.wework.com,/,-1.000000e+00,63,False,True,True,Lax,...,https://www.wework.com/l/commercial-real-estat...,user,www.wework.com,Session,C0001,Strictly Necessary Cookies,always active,wework.com,onetrust,en.json
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18432,_parsely_visitor,{%22id%22:%22pid=3dc5e053-5fe8-4f58-a6ef-72136...,.elle.com,/,1.715704e+09,135,False,False,False,,...,https://www.elle.com/culture/movies-tv/a435217...,_parsely_visitor,elle.com,395 days,C0001,Strictly Necessary & Analytics Cookies,always active,elle.com,onetrust,en.json
18433,location_data,"{""country_code"":""DE"",""postal_code"":""60323""}",www.elle.com,/,-1.000000e+00,56,False,False,True,,...,https://www.elle.com/culture/movies-tv/a435217...,location_data,www.elle.com,Session,C0001,Strictly Necessary & Analytics Cookies,always active,elle.com,onetrust,en.json
18434,OptanonConsent,isGpcEnabled=0&datestamp=Sat+Apr+15+2023+06%3A...,.elle.com,/,1.713077e+09,365,False,False,False,Lax,...,https://www.elle.com/culture/movies-tv/a435217...,OptanonConsent,elle.com,365 days,C0001,Strictly Necessary & Analytics Cookies,always active,elle.com,onetrust,en.json
18435,OptanonConsent,isGpcEnabled=0&datestamp=Sat+Apr+15+2023+06%3A...,.elle.com,/,1.713076e+09,365,False,False,False,Lax,...,https://www.elle.com/culture/movies-tv/a435217...,OptanonConsent,elle.com,365 days,C0001,Strictly Necessary & Analytics Cookies,always active,elle.com,onetrust,en.json


In [None]:
# No longer analyze cookie durations
import sys; sys.exit(1)

In [None]:
def get_duration_sec(cookie):
    if cookie['expires'] == -1:
        return -1
    return cookie['expires'] - cookie['load_end_time']

cookie_decls['dur_sec'] = cookie_decls.apply(get_duration_sec, axis=1)
cookie_decls.head()

KeyError: 'load_end_time'

In [None]:
# cookie_prefs[(cookie_prefs.name == 'eyeblaster') & (cookie_prefs.site == 'scientificamerican.com') ]
# cookie_decls[(cookie_decls.site == 'scientificamerican.com') ] # & (cookie_decls.domain == 'm6r.eu') ]

In [None]:
warnings = []
def parse_cookie_dur(pref_dur: str):
    pref_dur = pref_dur.lower().strip()
    if pref_dur == 'session':
        return -1
    if pref_dur == '0 days':  # equivalent to 'a few seconds' in onetrust
        return 100
    if pref_dur == 'persistent':
        return float('inf')
    try:
        return Duration(pref_dur).to_seconds()
    except Exception as e:
        warnings.append(f"Warning: {e} {pref_dur}")
        pass
    # Solve some rare cases:
    parts = pref_dur.split()
    assert len(parts) == 2
    cardinal, unit = float(parts[0]), parts[1]
    if unit == 'years':
        return cardinal * Duration('1 year').to_seconds()
    #     raise ValueError(f"Do not support {pref_dur}")
    return None

def test_parse_pref_dur():
    for dur in cookie_decls.pref_duration.unique():
        print(f'{dur} -> {int(parse_cookie_dur(dur)):,d}')
# test_parse_pref_dur()

In [None]:
# TODO: change pref_ to decl_ prefix
cookie_decls['decl_dur_sec'] = cookie_decls['decl_duration'].map(parse_cookie_dur)
cookie_decls

AssertionError: 

In [None]:
cookie_decls = cookie_decls[~cookie_decls.decl_dur_sec.isna()]

In [None]:
len(cookie_decls)

3002554

In [None]:
# set(warnings) # these are spanish ...

In [None]:
declared_durations = cookie_decls.decl_duration.value_counts()

In [None]:
n_sites_with_dur = cookie_decls.site.nunique()
# print(f"Num sites with duration: {n_sites_with_dur/n_sites_with_sett*100:.2f} ({n_sites_with_dur}/{n_sites_with_sett})")
dur_vios = cookie_decls[cookie_decls.decl_dur_sec < cookie_decls.dur_sec]
n_violate_sites = dur_vios.site.nunique()
print(f"Num sites with duration violations: {n_violate_sites / n_sites_with_dur * 100:.2f}% ({n_violate_sites}/{n_sites_with_dur})")

Num sites with duration violations: 76.49% (1025/1340)


In [None]:
dur_vios_counts = dur_vios.decl_duration.value_counts().to_frame().reset_index()
dur_vios_counts['nspecified'] = dur_vios_counts['index'].map(declared_durations)
dur_vios_counts['percent_too_short'] = dur_vios_counts['decl_duration'] / dur_vios_counts['nspecified'] * 100
dur_vios_counts.head(5)

Unnamed: 0,index,decl_duration,nspecified,percent_too_short
0,0 days,461319,513081,89.911534
1,Session,49107,612401,8.018765
2,365 days,35128,602452,5.830838
3,16 days,27948,27948,100.0
4,395 days,17166,30622,56.057736


In [None]:
duration_to_percent_too_short = {row['index']: row['percent_too_short'] for _, row in dur_vios_counts.iterrows()}
pref_dur_counts['percent_too_short'] = pref_dur_counts.Duration.map(duration_to_percent_too_short)
pref_dur_counts_latex = pref_dur_counts.rename(columns={"Duration": "Cookie Duration", "freq": r"Frequency (%)", "percent_too_short": r"% of Declared-too-short Cookies"})
pref_dur_counts_latex.replace({'0 days': 'A few seconds'}, inplace=True)
print(pref_dur_counts_latex.to_latex(float_format="%.2f", index=False))

\begin{tabular}{lrr}
\toprule
Cookie Duration &  Frequency (\%) &  \% of Declared-too-short Cookies \\
\midrule
        Session &          23.27 &                             8.02 \\
  A few seconds &          22.58 &                            89.91 \\
       365 days &          11.75 &                             5.83 \\
       730 days &           4.55 &                             3.81 \\
        30 days &           3.97 &                            10.49 \\
\bottomrule
\end{tabular}



In [None]:
# Analyze actual duration of "a few seconds"
dur_vios[dur_vios.pref_duration == "a few seconds"].sort_values(by='dur_sec')

Unnamed: 0,domain,expires,httpOnly,name,path,sameSite,secure,value,request_url,sent_name,...,load_start_time,load_end_time,pref_duration,pref_host,pref_name,pref_group_id,pref_site,pref_consent,dur_sec,pref_dur_sec
24824,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/ssoSessionCheck?callback=...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24769,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/cdn-cgi/scripts/5c5dd728/...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24772,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/products/marlin/fonts/nex...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24775,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/pb-assets/adobe/conversio...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
24794,www.cell.com,1.619896e+09,False,_hjIncludedInSessionSample,/,Lax,False,0,https://www.cell.com/products/marlin/fonts/els...,_hjIncludedInSessionSample,...,1.619896e+09,1.619896e+09,a few seconds,www.cell.com,_hjIncludedInSessionSample,2,cell.com,True,1.092363e+02,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93688,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/img/locke...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93708,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/gd-fj-...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93743,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/dist/g...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
93589,.www.glassdoor.com,2.534023e+11,False,G_ENABLED_IDPS,/,,False,google,https://www.glassdoor.com/app/static/js/dist/g...,G_ENABLED_IDPS,...,1.619896e+09,1.619896e+09,a few seconds,www.glassdoor.com,G_ENABLED_IDPS,C0001,glassdoor.com,True,2.517824e+11,100.0
