# Common Criteria example

This notebook illustrates basic functionality with the `CCDataset` class that holds Common Criteria dataset and of its sample `CommonCriteriaCert`

In [6]:
from sec_certs.dataset.common_criteria import CCDataset
from sec_certs.dataset.common_criteria import CommonCriteriaCert
import pandas as pd

## Get fresh dataset snapshot from mirror

In [None]:
dset = CCDataset.from_web_latest()
print(len(dset)) # Print number of certificates in the dataset

## Do some basic dataset serialization

In [None]:
# Dump dataset into json and load it back
dset.to_json("./cc_dset.json")
new_dset: CCDataset = CCDataset.from_json("./cc_dset.json")
assert dset == new_dset

In [8]:
# Turn dataset into Pandas DataFrame
df = dset.to_pandas()

## Simple dataset manipulation

In [40]:
# Iterate over certificates in dataset
for cert in dset:
    pass

# Get certificates produced by Infineon manufacturer
infineon_certs = [x for x in dset if "Infineon" in x.manufacturer]
df_infineon = df.loc[df.manufacturer.str.contains("Infineon", case=False)]

# Get certificates with some CVE
vulnerable_certs = [x for x in dset if x.heuristics.related_cves]
df_vulnerable = df.loc[~df.related_cves.isna()]

# Show CVE ids of some vulnerable certificate
print(f"{vulnerable_certs[0].heuristics.related_cves=}")

# Get certificates from 2015 and newer
df_2015_and_newer = df.loc[df.year_from > 2014]

In [None]:
# Plot distribution of years of certification
df.year_from.value_counts().sort_index().plot.line()

## Dissect single certificate

In [None]:
# Select a certificate and print some attributes
cert: CommonCriteriaCert = dset["bad93fb821395db2"]
print(f"{cert.name=}")
print(f"{cert.heuristics.cpe_matches=}")
print(f"{cert.heuristics.report_references.directly_referencing=}")

In [11]:
vulnerable_certs = [x for x in dset if x.heuristics.related_cves]

## Serialize single certificate

In [9]:
cert.to_json("./cert.json")
new_cert = cert.from_json("./cert.json")
assert cert == new_cert

# Serialize as Pandas series
ser = pd.Series(cert.pandas_tuple, index=cert.pandas_columns)

## Assign dataset with CPE records and compute vulnerabilities

*Note*: The data is already computed on dataset obtained with `from_web_latest()`, this is just for illustration. 
*Note*: This may likely not run in Binder, as the corresponding `CVEDataset` and `CPEDataset` instances take a lot of memory.

In [None]:
# Automatically match CPEs and CVEs
_, cpe_dset, _ = dset.compute_cpe_heuristics()
dset.compute_related_cves()

## Create new dataset and fully process it

*Warning*: It's not good idea to run this from notebook. It may take several hours to finnish. We recommend using `from_web_latest()` or turning this into a Python script.

In [None]:
dset = CCDataset()
dset.get_certs_from_web()
dset.process_protection_profiles()
dset.download_all_pdfs()
dset.convert_all_pdfs()
dset.analyze_certificates()