# Libs

In [1]:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import LogNorm
plt.rcParams.update({'font.size': 12})
import matplotlib.colors as mpc
import seaborn as sns

# Load Data

### QUIC

In [2]:
df_quic = pd.read_pickle(f"../data/pkl/df_quicreach_handshakes.pkl.zst")
df_quic.head()

Unnamed: 0,domain,rtt,sent_tls_payload,recv_tls_payload,handshake_tag,sent_udp_payload,recv_udp_payload,ampl_factor,initial_size
0,0-1xbet.best,8.486,288,2656,Amplification,1220,5118,4.2,1200
1,0-1xbet.best,16.799999,288,2654,Amplification,1220,5116,4.2,1212
2,0-1xbet.best,23.701,288,2656,Amplification,1222,5118,4.2,1222
3,0-1xbet.best,8.466,288,2655,Amplification,1232,5117,4.2,1232
4,0-1xbet.best,10.727,288,2656,Amplification,1242,5118,4.1,1242


### TLS

In [None]:
df_tls = pd.read_pickle(f"../data/pkl/df_tls_certs.pkl.zst")
df_tls["certsize_total"] = df_tls.groupby(["domain"])["certsize"].transform("sum")
df_tls.head()

# Show Number of Services with same Cert

In [None]:
ax = df_tls[df_tls["type"]=="LEAF"][
        "sancount"].value_counts().sort_index().plot(
            logy=True, marker=".", figsize=(8*0.7,3*0.7))

ax.set_xlabel("Number of Services [#]")
ax.set_ylabel("QUIC Services [#]")
ax.set_xlim((-30,1030))
ax.set_ylim((10**-0.5,10**6))
ax.xaxis.set_minor_locator(MultipleLocator(100))

fig = ax.get_figure()
plt.show()
plt.close(fig)

In [None]:
df_tls_quicservices = df_tls[ (df_tls["domain"].isin(set(df_quic["domain"]))) ]

ax = df_tls_quicservices[df_tls_quicservices["type"]=="LEAF"][#.drop_duplicates(subset="hash")[
        "sancount"].value_counts().sort_index().plot(
            logy=True, marker=".", figsize=(8*0.7,3*0.7))

ax.set_xlabel("Number of QUIC Services [#]")
ax.set_ylabel("QUIC Services [#]")
ax.set_xlim((-30,1030))
ax.set_ylim((10**-0.5,10**6))
ax.xaxis.set_minor_locator(MultipleLocator(100))

fig = ax.get_figure()
plt.show()
plt.close(fig)

# Detect Cruise-Liner Certs

In [None]:
# todo discuss if we should drop duplicates here!
df_cruisers = df_tls_quicservices[df_tls_quicservices["type"]=="LEAF"].drop_duplicates(
                subset=["hash"])[["hash","isscn","sansize","sancount","certsize_total","certsize"]].copy()

df_cruisers = df_tls_quicservices[df_tls_quicservices["type"]=="LEAF"][
    ["hash","isscn","sansize","sancount","certsize_total","certsize"]].copy()

df_cruisers["sansize_share"] = df_cruisers["sansize"]/df_cruisers["certsize"]*100

In [None]:
ax = df_cruisers.plot.hexbin(x="certsize", y="sansize_share",
                        sharex=False, norm=mpc.LogNorm(vmin=10**0, vmax=10**5,), 
                        cmap=["Spectral_r","viridis","icefire","coolwarm","crest","flare", "cividis"][1],
                        reduce_C_function=np.size, alpha=1,
                        linewidths=0.2,
                        extent=(0,16000,0,100))

cruiser_threshold =  28.9 # 16.2 ||| ### 6.5 for 1%  ### 32.98 for 0.1%
ampl_threshold = 3*1357
ax.axhline(cruiser_threshold, linestyle="-.", linewidth=1., color="grey", alpha=0.6)
ax.axvline(ampl_threshold, linestyle="-.", linewidth=1., color="grey", alpha=0.6)

df_cruiser_found = df_cruisers[df_cruisers["sansize_share"]>cruiser_threshold]

print("Total Leaf Certs:", len(df_cruisers),)
print("Max:", df_cruisers[["certsize","sansize_share"]].max().to_dict())

print(f"Cruiser Certs ({cruiser_threshold} % SANs):", len(df_cruiser_found))
print("Percentile:", len(df_cruiser_found)/len(df_cruisers)*100)
print("Affected QUIC Domains from our list: ", df_tls_quicservices[df_tls_quicservices["hash"].isin(
    df_cruiser_found["hash"])]["domain"].nunique(), "/", df_quic["domain"].nunique())

# labels
ax.set_xlabel("Leaf Certificate Size [B]")
ax.set_ylabel(f"Byte Share of Subj. Alt Names [%]")
plt.gcf().get_axes()[1].set_ylabel("QUIC Services [#] ")
ax.xaxis.set_minor_locator(MultipleLocator(1000))
ax.yaxis.set_minor_locator(MultipleLocator(10))
ax.text(-600, 21, "99%", color="grey", alpha=0.8)
ax.text(-600, 32, "0.9%", color="grey", alpha=0.8)
ax.text(14500, 32, "0.1%", color="grey", alpha=0.8)
ax.text(15200, 21, "0%", color="grey", alpha=0.8)

fig = ax.get_figure()
fig.savefig(f"./plots/hexbin_quic_certs_cruiser.pdf", bbox_inches='tight')
fig.savefig(f"./plots/hexbin_quic_certs_cruiser.png", bbox_inches='tight', dpi=200)
plt.show()
plt.close(fig)

In [None]:
# lets find out how many services in each area of the hexbin
(df_cruisers.groupby([ list( zip( 
                        (df_cruisers["sansize_share"]>cruiser_threshold), 
                        (df_cruisers["certsize"]>ampl_threshold))) ]).size() / \
                            len(df_cruisers) * 100).round(1)

### Who is issuing cruise-liner certs?

In [None]:
df_cruisers[ (df_cruisers["sansize_share"]>cruiser_threshold) ]["isscn"].value_counts(
    normalize=True).to_frame()*100