In [1]:
from df_utils import load_df
from crawl_ids import CrawlRokuTop1KMITM
from nb_utils import make_latex_table, get_popular_domains_from_reqs, get_popular_domains_from_tcp_conns
from ott_leaks import DEVICE_ID_NAMES, print_leak_stats

In [3]:
roku_req = load_df(CrawlRokuTop1KMITM, "http_req")
req_adblocked = roku_req[roku_req.adblocked]
req_pihole_blocked = roku_req[roku_req.pihole_blocked]

In [4]:
len(roku_req), len(req_pihole_blocked)

(86220, 21760)

In [5]:
req_adblocked.host.nunique(), req_adblocked.req_domain.nunique()

(306, 140)

In [6]:
req_missed_by_pihole = roku_req[roku_req.adblocked & ~roku_req.pihole_blocked]
req_missed_by_pihole.host.nunique(), req_missed_by_pihole.req_domain.nunique()

(192, 85)

In [7]:
from tld import get_fld
pi_missed_domains = set(req_missed_by_pihole.req_domain.unique())

domains = set()
for l in open("../../blocklistparser//blocklists/pi-hole.txt"):
    domains.add(get_fld("http://" + l.rstrip(), fail_silently=True))


len(pi_missed_domains), len(pi_missed_domains.intersection(domains))

(85, 46)

In [8]:
len(req_adblocked), len(req_pihole_blocked)

(30099, 21760)

In [10]:
roku_tcp = load_df(CrawlRokuTop1KMITM, "tcp_conn")
adblocked = roku_tcp[roku_tcp.adblocked]
pihole_blocked = roku_tcp[roku_tcp.pihole_blocked]
missed_by_pihole = roku_tcp[roku_tcp.adblocked & ~roku_tcp.pihole_blocked]
len(set(adblocked.domain.unique()) - set(missed_by_pihole.domain.unique()))

75

In [11]:
get_popular_domains_from_tcp_conns(missed_by_pihole)

Unnamed: 0,domain,Num. of channels
147,tremorhub.com,97
141,spotxchange.com,90
153,vimeo.com,86
20,bidswitch.net,70
154,vimeocdn.com,69
142,springserve.com,61
112,innovid.com,61
28,clrstm.com,60
10,adsrvr.org,57
1,2mdn.net,55


In [12]:
roku_tcp['web_mobile_blocked'] =  roku_tcp.disconnect_blocked | roku_tcp.ghostery_blocked | \
        roku_tcp.easylist_blocked | roku_tcp.easyprivacy_blocked

In [17]:
roku_tcp[roku_tcp.pihole_blocked & ~roku_tcp.web_mobile_blocked].domain.unique()

array([u'aetn.com', u'nbcuni.com', u'cbsi.com', u'yahoo.com',
       u'alphonso.tv', u'nbcsports.com', u'foxnews.com', u'piksel.com',
       u'crackle.com', u'dowjoneson.com', u'fox.com', u'nba.com',
       u'adultswim.com', u'bet.com', u'samplicio.us', u'mlb.com',
       u'ip-api.com'], dtype=object)

In [18]:
roku_tcp[~roku_tcp.pihole_blocked & roku_tcp.web_mobile_blocked].host.unique()

array([u'vid.springserve.com', u'678pd-i3tvn.ads.tremorhub.com',
       u'7mqc9-qo1ow.ads.tremorhub.com', u'ioms.bfmio.com',
       u'd3jwlm43fjnwxe.cloudfront.net', u'player.vimeo.com',
       u'event.spotxchange.com', u'node-p2e-egh9ca.sitescout.com',
       u'node-p2e-o9tee3.sitescout.com', u'tkx2-prod.anvato.net',
       u'd1xka8tofigsut.cloudfront.net', u'secure.brightcove.com',
       u'brightcove01.brightcove.com', u'27ql0-2l89t.ads.tremorhub.com',
       u'r4---sn-ab5l6nzr.c.2mdn.net', u'r4---sn-vgqsrnez.c.2mdn.net',
       u'r3---sn-ab5l6n6l.c.2mdn.net', u'proxy.spotxchange.com',
       u'imasdk.googleapis.com', u'video-static-01.clipsyndicate.com',
       u'nqs-wdc1-c2.youboranqs01.com', u'data.ad-score.com',
       u'wcnc-download.edgesuite.net', u'geoip.maxmind.com',
       u'ag.innovid.com', u'r3---sn-ab5l6nsy.c.2mdn.net',
       u'amdlive-ch01.ctnd.com.edgesuite.net', u'api.vimeo.com',
       u'fpdl.vimeocdn.com', u'assets.springserve.com',
       u'vid-io-iad.springserve

### Does Pi-Hole block ID leaks?

In [20]:
leaks_roku = load_df(CrawlRokuTop1KMITM, 'leak')
id_leaks_roku = leaks_roku[leaks_roku.id_type.isin(DEVICE_ID_NAMES)]

missed_id_leaks_roku = id_leaks_roku[~id_leaks_roku.pihole_blocked]
missed_leaks_roku = leaks_roku[~leaks_roku.pihole_blocked]

id_leaks_table_missed = print_leak_stats(missed_id_leaks_roku)
id_leaks_table_missed

Unnamed: 0,ID,Num. of leaks,Num. of channels
0,AD ID,5747,165
1,Serial No,2938,73


In [21]:
leaks_table_missed = print_leak_stats(missed_leaks_roku)
leaks_table_missed


Unnamed: 0,ID,Num. of leaks,Num. of channels
0,Build Number,3145,123
1,AD ID,5747,165
2,Channel name,21960,131
3,Email,25,5
4,City,167,28
5,State,27,20
6,Zip,183,30
7,Serial No,2938,73


In [22]:
id_leaks_table = print_leak_stats(id_leaks_roku)
id_leaks_table

Unnamed: 0,ID,Num. of leaks,Num. of channels
0,AD ID,23963,359
1,Serial No,6717,114


In [23]:
print(make_latex_table(id_leaks_table_missed, label='missed_ids_pi_hole', caption='ID leakage missed by Pi-Hole'))



\begin{table}[H]
%\centering
%\resizebox{\columnwidth}{!}{%
\begin{tabular}{lrr}
\toprule
 Id        &   Num. of leaks &   Num. of channels \\
\midrule
 AD ID     &            5747 &                165 \\
 Serial No &            2938 &                 73 \\
\bottomrule
\end{tabular}
%}
\caption{ID leakage missed by Pi-Hole}
\label{tab:missed_ids_pi_hole}
\end{table}


In [24]:
df = print_leak_stats(id_leaks_roku)
df

Unnamed: 0,ID,Num. of leaks,Num. of channels
0,AD ID,23963,359
1,Serial No,6717,114


In [25]:
df = get_popular_domains_from_reqs(missed_id_leaks_roku)
df

#missed_id_leaks_roku.req_domain.value_counts()

Unnamed: 0,req_domain,Num. of channels
50,tremorhub.com,73
11,bfmio.com,44
26,irchan.com,42
30,monarchads.com,40
12,bidswitch.net,26
16,digitru.st,26
43,sharethrough.com,19
3,adrise.tv,18
4,adsrvr.org,18
28,kargo.com,16


In [26]:
print(make_latex_table(
    df, label="trackers_missed_by_pihole",
    caption="Domains that receive Ad ID and Serial number after filtering requests with Pi-hole"))


\begin{table}[H]
%\centering
%\resizebox{\columnwidth}{!}{%
\begin{tabular}{lr}
\toprule
 Req domain       &   Num. of channels \\
\midrule
 tremorhub.com    &                 73 \\
 bfmio.com        &                 44 \\
 irchan.com       &                 42 \\
 monarchads.com   &                 40 \\
 bidswitch.net    &                 26 \\
 digitru.st       &                 26 \\
 sharethrough.com &                 19 \\
 adrise.tv        &                 18 \\
 adsrvr.org       &                 18 \\
 kargo.com        &                 16 \\
\bottomrule
\end{tabular}
%}
\caption{Domains that receive Ad ID and Serial number after filtering requests with Pi-hole}
\label{tab:trackers_missed_by_pihole}
\end{table}
