# Libs

In [1]:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import LogNorm
plt.rcParams.update({'font.size': 12})
import seaborn as sns

# Load Data

### Tranco

In [2]:
df_tranco = pd.read_pickle(f"../data/pkl/df_tranco.pkl.zst")
df_tranco.head()

Unnamed: 0,rank,domain
0,1,google.com
1,2,akamaiedge.net
2,3,facebook.com
3,4,youtube.com
4,5,netflix.com


In [3]:
dict_tranco_rank = df_tranco.set_index("domain")["rank"].to_dict()
dict_tranco_rank["netflix.com"]

5

### QUIC

In [4]:
df_quic = pd.read_pickle(f"../data/pkl/df_quicreach_handshakes.pkl.zst")
df_quic["tranco_rank"] = df_quic["domain"].map(dict_tranco_rank)
df_quic.head()

Unnamed: 0,domain,rtt,sent_tls_payload,recv_tls_payload,handshake_tag,sent_udp_payload,recv_udp_payload,ampl_factor,initial_size,tranco_rank
0,0-1xbet.best,8.486,288,2656,Amplification,1220,5118,4.2,1200,
1,0-1xbet.best,16.799999,288,2654,Amplification,1220,5116,4.2,1212,
2,0-1xbet.best,23.701,288,2656,Amplification,1222,5118,4.2,1222,
3,0-1xbet.best,8.466,288,2655,Amplification,1232,5117,4.2,1232,
4,0-1xbet.best,10.727,288,2656,Amplification,1242,5118,4.1,1242,


### TLS

In [5]:
df_tls = pd.read_pickle(f"../data/pkl/df_tls_certs.pkl.zst")
df_tls = df_tls.drop_duplicates(subset=["domain"], keep="last")
df_tls["tranco_rank"] = df_tls["domain"].map(dict_tranco_rank)
df_tls.head()

Unnamed: 0,domain,type,d,dt,certsize,tbssize,subjcn,subjhash,subjk,subjsize,...,sctcount,sctsize,extssize,sigalg,sigsize,validation,precert,ca,hash,tranco_rank
2,0-1.ir,LEAF,2,2,1315,1035,0-1.ir,54a78fa9554bdf23acf9f904cbf1f829a6e95693,259e6544d8a23a946330daab01dd4464028b85d7,19,...,2,246,484,SHA256-RSA,256,DV,False,False,7bec3730cd02e1f9be61e4db036f028c2d6d4a81d9480b...,146378.0
3,0-11-0.com,LEAF,0,0,1288,1008,www.stackssl.com,21b145e103c2e1465e6c7e68de0c2f9b59a3501b,c1de76d0eb2484971f8e1657e26f5ae3388d0921,29,...,0,0,440,SHA256-RSA,256,DV,False,False,f9cc586e274a96227aa09eca83052843b76fca404faa3b...,288724.0
5,0-1xbet.best,LEAF,1,1,1341,1250,sni.cloudflaressl.com,692dd356163caca0af452c8e0a0e19c97807ea59,7bef63bb95644bb03e9cd1507afdd32183917338,119,...,3,367,773,ECDSA-SHA256,72,OV,False,False,6a251e976d17386cd68ee077e643e8105324fc6d5785af...,
9,0-1xbet.club,LEAF,3,3,967,845,*.0-1xbet.club,92fc52ace9cdabd53a73ea154f68e4401abd58ef,271ed52a37ace035155cdebb7d64650c00e75d37,27,...,2,244,492,ECDSA-SHA384,103,DV,False,False,08d2ff60ee4993431d2d268d7083b1a04a8dd256586b02...,
11,0-1xbet.info,LEAF,1,1,1338,1248,sni.cloudflaressl.com,692dd356163caca0af452c8e0a0e19c97807ea59,cacad42a0484ce4f649cea637f0ba68512f6ec35,119,...,3,365,771,ECDSA-SHA256,71,OV,False,False,1fbe02e4535544b8f1e58af56d863511993a453edbbb39...,


# Visualize Reachability Per Rank

In [6]:
# [0, 5] means 0 <= x <= 5 (closed)
# (0, 5) means 0 < x < 5 (open)
myBins = pd.cut(df_tls["tranco_rank"],
            bins=[ 100000*i+1 for i in range(0, 11) ],
            right=False)

tmp = df_tls.groupby(myBins)["domain"].nunique().rename("HTTPS Services").to_frame()
tmp

Unnamed: 0_level_0,HTTPS Services
tranco_rank,Unnamed: 1_level_1
"[1, 100001)",79533
"[100001, 200001)",78117
"[200001, 300001)",78700
"[300001, 400001)",79391
"[400001, 500001)",79436
"[500001, 600001)",79007
"[600001, 700001)",79564
"[700001, 800001)",81262
"[800001, 900001)",78896
"[900001, 1000001)",79722


In [None]:
# repeat for QUIC frame
myBins = pd.cut(df_quic["tranco_rank"],
            bins=[ 100000*i+1 for i in range(0, 11) ],
            right=False)

tmp["QUIC Services"] = df_quic.groupby(myBins)["domain"].nunique()

tmp["HTTPS only Services"] = tmp["HTTPS Services"]-tmp["QUIC Services"]
tmp = tmp[sorted(tmp.columns)]
tmp = tmp/100000*100
tmp

### Horizontal Bars (relative)

In [None]:
fsize_wide_barh = (6*0.7,3*0.7)  # (8*0.7,3*0.7)
ax = tmp[["QUIC Services", "HTTPS only Services"]].plot.barh(
       stacked=True, cmap="GnBu_r",
       figsize=fsize_wide_barh, xlim=(0,100),
       edgecolor="black", )

ax.set_xlabel("Share of Services [%]")
ax.set_ylabel("Tranco Groups [Rank]")
ax.legend(bbox_to_anchor=(1.025, 1.3),
          ncol=2, fancybox=False, shadow=False,
          handletextpad=0.6, columnspacing=1.5, framealpha=0.7)
ax.xaxis.set_minor_locator(MultipleLocator(10))
#for lh in leg.legendHandles: 
#    lh.set_alpha(1)

fig = ax.get_figure()
fig.savefig(f"./plots/barh_reachable_quic_sites.pdf", bbox_inches='tight')
fig.savefig(f"./plots/barh_reachable_quic_sites.png", bbox_inches='tight', dpi=200)
plt.show()
plt.close(fig)

In [None]:
tmp.agg(["mean","median","std"])

# Visualize Handshake Type per Rank

In [None]:
# prepare plot
tmp = df_quic.groupby([myBins, "handshake_tag"])["domain"].nunique() # caveat: mybins from df_quic above
tmp = tmp.unstack()
tmp = tmp[["Amplification", "Multi-RTT", "RETRY", "1-RTT"]]
tmp = tmp.div(tmp.sum(axis=1), axis=0)*100
tmp

In [None]:
ax = sns.heatmap(tmp[::-1],
            annot=True,
            cmap=["vlag","flare","Spectral_r","cividis","crest"][4],
            norm=LogNorm(vmax=100),
            fmt='.2f', annot_kws={'rotation': 0},
            linewidths=0.1,
            cbar_kws={'label': 'QUIC Services [%]'})

plt.gcf().set_size_inches(7.4*0.7,3.3*0.7)
ax.set_xlabel(f"QUIC Handshake Type")
ax.set_ylabel("Tranco Groups [Rank]")
ax.tick_params(axis='x', rotation=15)

fig = ax.get_figure()
fig.savefig(f"./plots/heatmap_quic_handshake_types_per_rank_rel2.pdf", bbox_inches='tight')
fig.savefig(f"./plots/heatmap_quic_handshake_types_per_rank_rel2.png", bbox_inches='tight', dpi=200)
plt.show()
plt.close(fig)

In [None]:
tmp.agg(["mean","median","std"])