# Plotting notebook

This notebook generates plots for paper submission. Prior to running this, make sure to run `temporal_trends` and `vulnerabilities` notebooks to produce necessary CSV files into `RESULTS_DIR` folder.

In [2]:
from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats
import seaborn as sns

from sec_certs.dataset import CCDataset

RESULTS_DIR = Path("./results")
FIGURE_DIR = RESULTS_DIR / "figures/"

if not FIGURE_DIR.exists():
    FIGURE_DIR.mkdir()

# matplotlib.use("pgf")
sns.set_theme(style="white")
plt.rcParams["axes.linewidth"] = 0.5
plt.rcParams["legend.fontsize"] = 6.5
plt.rcParams["xtick.labelsize"] = 8
plt.rcParams["ytick.labelsize"] = 8
plt.rcParams["ytick.left"] = True
plt.rcParams["ytick.major.size"] = 5
plt.rcParams["ytick.major.width"] = 0.5
plt.rcParams["ytick.major.pad"] = 0
plt.rcParams["xtick.bottom"] = True
plt.rcParams["xtick.major.size"] = 5
plt.rcParams["xtick.major.width"] = 0.5
plt.rcParams["xtick.major.pad"] = 0
plt.rcParams["pgf.texsystem"] = "pdflatex"
plt.rcParams["font.family"] = "serif"
plt.rcParams["text.usetex"] = True
plt.rcParams["pgf.rcfonts"] = False
plt.rcParams["axes.titlesize"] = 7
plt.rcParams["axes.labelsize"] = 7
plt.rcParams["legend.handletextpad"] = 0.3
plt.rcParams["lines.markersize"] = 4
sns.set_palette("deep")

## Histogram CVE disclosure dates vs. date of certification

In [2]:
df = pd.read_csv(RESULTS_DIR / "exploded_cves.csv").rename(columns={"Unnamed: 0": "dgst"}).set_index("dgst")

hist = sns.histplot(df.n_days_after_certification, kde=False)
hist.set(
    xlim=(-2200, 4600),
    ylim=(0, 1800),
    xlabel="Number of days after date of certification",
    ylabel="Frequency of CVEs",
)
hist.axvline(0, color="red", linewidth="1", label="Certification date")
hist.legend(loc="upper right")

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(3.35, 2)
fig.savefig(FIGURE_DIR / "cve_hist.pgf", bbox_inches="tight")
fig.savefig(FIGURE_DIR / "cve_hist.pdf", bbox_inches="tight")
plt.close(fig)

# QQ plot of for vulnerability disclosure vs. certification date (compared against normal distribution)
# stats.probplot(df.n_days_after_certification, dist="norm", plot=plt)
# plt.show()

## Validity boxplot

In [3]:
df_validity = pd.read_csv(RESULTS_DIR / "df_validity.csv")

box = sns.boxplot(data=df_validity, x="year_from", y="validity_period", linewidth=0.75, flierprops={"marker": "x"})
box.set(
    xlabel="Year of certification",
    ylabel="Lifetime of certificates (in years)",
    title="Boxplot of certificate validity periods",
)
box.tick_params(axis="x", rotation=75)

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(3.5, 2.5)
fig.savefig(FIGURE_DIR / "boxplot_validity.pgf", bbox_inches="tight")
fig.savefig(FIGURE_DIR / "boxplot_validity.pdf", bbox_inches="tight")

# Cells for 3-subplot figure

Contains: Average EAL levels, Interesting schemes evolution, Stackplot of categories

In [3]:
figure_width = 2.3
figure_height = 1.8

dset = CCDataset.from_web()  # local instantiation
df = dset.to_pandas()

Downloading CC Dataset: 100%|██████████| 164M/164M [02:07<00:00, 1.35MB/s] 


In [9]:
avg_levels = pd.read_csv(RESULTS_DIR / "avg_eal.csv")
eal_to_num_mapping = {eal: index for index, eal in enumerate(df["eal"].cat.categories)}
avg_levels["smartcard_category"] = avg_levels.category.map(
    lambda x: x if x == "ICs, Smartcards" else "Other 14 categories"
)
line = sns.lineplot(
    data=avg_levels,
    x="year_from",
    y="eal_number",
    hue="smartcard_category",
    errorbar=None,
    style="smartcard_category",
    markers=True,
)
line.set(xlabel=None, ylabel=None, title=None, xlim=(1999.6, 2023.4))
ymin = 1
ymax = 9
ylabels = [
    x if "+" in x else x + r"\phantom{+}" for x in list(eal_to_num_mapping.keys())[ymin : ymax + 1]
]  # this also aligns the labels by adding phantom spaces
line.set_yticks(range(ymin, ymax + 1), ylabels)
line.set_xticks([1998, 2003, 2008, 2013, 2018, 2023])
line.legend(title=None, labels=avg_levels.smartcard_category.unique())

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(figure_width, figure_height)
fig.tight_layout(pad=0.1)
fig.savefig(FIGURE_DIR / "temporal_trends_categories.pgf")
fig.savefig(FIGURE_DIR / "temporal_trends_categories.pdf")
plt.close()

In [12]:
interesting_schemes = pd.read_csv(RESULTS_DIR / "interesting_schemes.csv")

line = sns.lineplot(
    data=interesting_schemes,
    x="year_from",
    y="size",
    hue="scheme",
    style="scheme",
    markers=True,
    dashes=True,
)
line.set(xlabel=None, ylabel=None, title=None, xlim=(1999.6, 2023.4), ylim=(0, 90))
line.set_xticks([1998, 2003, 2008, 2013, 2018, 2023])
line.legend(title=None)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(figure_width, figure_height)
fig.tight_layout(pad=0.1)
fig.savefig(FIGURE_DIR / "temporal_trends_schemes.pgf")
fig.savefig(FIGURE_DIR / "temporal_trends_schemes.pdf")
plt.close()

In [16]:
n_certs = pd.read_csv(RESULTS_DIR / "popular_categories.csv").astype({"year_from": "category"})
dct = {
    "ICs, Smart Cards and Smart Card-Related Devices and Systems": "ICs and Smart Cards",
    "Network and Network-Related Devices and Systems": "Network-Related Devices",
    "Other Devices and Systems": "Other Devices",
    "One of 11 other categories": "11 Other Categories",
}
n_certs.popular_categories = n_certs.popular_categories.map(lambda x: dct.get(x, x))

cats = n_certs.popular_categories.unique()
years = n_certs.year_from.cat.categories[:-1]
data = [n_certs.loc[n_certs.popular_categories == c, "size"].tolist()[:-1] for c in cats]

# palette = sns.color_palette("Spectral", 5).as_hex()
# colors = ",".join(palette)

plt.stackplot(
    years,
    data,
    labels=cats,
)
plt.legend(loc="upper center", bbox_to_anchor=(0.38, 1.02))
plt.xticks([1998, 2003, 2008, 2013, 2018, 2023])
# plt.title("(c) Popularity of categories")
plt.xlim(1997, 2023)

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(figure_width, figure_height)
fig.tight_layout(pad=0.1)
fig.savefig(FIGURE_DIR / "temporal_trends_stackplot.pdf")
fig.savefig(FIGURE_DIR / "temporal_trends_stackplot.pgf")
plt.close()