# Miscellaneous 
In this notebook, we perform some misc. tasks regarding the TPM dataset

- Creation of a clean dataset of result files used for analysis
- Making table of TPM firmwares we have in our dataset 

First we start with loading the metadata for dataset we already have, which is not cleaned

In [None]:
import json

def load_metadata(metadata_path):
    try:
        metadata = {}
        with open(metadata_path, "r") as f:
            metadata = json.load(f)

        assert metadata
        entries = metadata["entries"].values()
        assert 0 < len(entries)
    except:
        print("report_create: retrieving metadata was unsuccessful")
        return {}

    # We now group entries by vendor
    grouped = {}
    for entry in entries:
        vendor = entry.get("vendor")
        if vendor is None:
            print(
                f"report_create: entry {entry} does not contain vendor")
            continue

        grouped.setdefault(vendor, [])
        grouped[vendor].append(entry)
    return grouped

metadata = load_metadata('../metadata.json')

In [None]:
metadata.keys()

Now what we want to do in order to create a clear dataset of results:

- Wry to parse each file using the tools from `algtestprocess`, take note of each missing result and note the paths for results which have to be checked and possibly wont be included to the resulting dataset. This has to be done manually by default we wont remove any results which had some successful results from at least one category: support, performance, or cryptographic properties

In [None]:
import logging, sys
logging.disable(sys.maxsize)

In [None]:
from algtestprocess.modules.data.tpm.manager import TPMProfileManager
from algtestprocess.modules.data.tpm.enums import CryptoPropResultCategory as cat

cryptoprops = []
for vendor in metadata.keys():
    for entry in metadata.get(vendor):
        tpm_name = entry['TPM name']
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            performance = man.performance_profile
            support = man.support_profile
            cpps = man.cryptoprops
            assert performance or support or cpps
            # By default we cannot remove these kind of results, because even though 
            # some phase of measurement might have failed or intentionally skipped, 
            # other however might have Tbeen successful, as such we dont want to lose data.
            if performance is None:
                print(f"MISSING - performance profile at {measurement_path}")
            elif len(performance.results) < 10:
                print(f"Suspiciously low number of performance profile entries {len(performance.results)=} at {measurement_path}")
            
            if support is None:
                print(f"MISSING - support profile at {measurement_path}")
            elif len(support.results) < 10:
                print(f"Suspiciously low number of support profile entries {len(support.results)=} at {measurement_path}")

            if cpps is None:
                print(f"MISSING - cryptoprops profile at {measurement_path}")
            elif len(cpps.results) < 5:
                print(f"Suspiciously low number of cryptoprops profile entries {len(cpps.results)=} at {measurement_path}")
            

In [None]:
blacklist = []

Now it is assumed that you put all the bad result path into blacklist. We may now create the clean dataset by moving all results and sorting them to folders by manufacturer and firmware version.

In [None]:
import os
import subprocess

output_folder = os.path.join('./tpm-dataset/')
os.mkdir(output_folder)

used_names = set()
counter = 0
for vendor in metadata.keys():
    vendor_folder = os.path.join(output_folder, vendor)
    os.mkdir(vendor_folder)
    
    for entry in metadata.get(vendor):
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            support = man.support_profile
            try:
                fw = support.firmware_version
                assert support and fw
            except:
                print(f"At {measurement_path}, unexpectedly encountered no support profile")
                continue
            
            firmware_folder = os.path.join(vendor_folder, fw)

            assert firmware_folder is not None
            if not os.path.exists(firmware_folder):
                os.mkdir(firmware_folder)
        
            folder_name = measurement_path.split('/')[-1]
            if folder_name in used_names:
                # We have folder named same in the measurements so we must rename it
                firmware_folder = os.path.join(firmware_folder, f"{folder_name}-{fw}-{counter}")
                counter += 1
            else:
                firmware_folder = os.path.join(firmware_folder, folder_name)

            used_names.add(folder_name)

            print(f"Copying {folder_name} to {firmware_folder}")
            subprocess.run(['cp', '-r', measurement_path, firmware_folder])
            

# TPM firmware table
- now we create a firmware table out of the results we have cleaned 

In [None]:
#!python ../process.py tpm metadata-update ./tpm-dataset

In [None]:
metadata = load_metadata('./metadata.json')

## Utilities

In [None]:
import re
from math import inf

def _table(l, cols, header):
    # header repeat col times
    out = ""
    out += "| " + (" | ".join(header) + " | ") * cols + "\n"
    out += "| " + ("|".join(["---"] * (cols * len(header))) + " | ") + "\n"

    i = 0
    while i < len(l):
        out += "| "
        for _ in range(cols):
            if i < len(l):
                entries = l[i]
            else:
                entries = [" " for _ in range(len(header))]

            assert len(entries) == len(header)

            out += " | ".join(map(str, entries)) + " | "
            i += 1
        out += "\n"
    return out

import re
from math import inf

def tpm_sorted(profiles, device_name):
    """
    Sorts the profiles according to manufacturer id alphabetically, then
    firmware version numerically

    Assumes device name is in the form of rgx
    """
    RGX = r"(\s*.+)+\s\s*\d+(\.\d+)*(\s[\[]\d+[\]])?"
    try:
        assert all([re.match(RGX, device_name(p)) is not None for p in profiles])
    except AssertionError:
        print("These device names do not match format")
        print([name for p in profiles if not re.match(RGX, (name := device_name(p)))])

    def key_f(profile):
        manufacturer = version = idx = inf
        numbers = [inf] * 4
        l, r = device_name(profile).rsplit(maxsplit=1)

        if re.match(r"[\[]\d+[\]]", r):
            idx = int(r.replace("[", "").replace("]", ""))
            manufacturer, firmware = l.rsplit(maxsplit=1)
        else:
            manufacturer, firmware = l, r

        numbers = [int(x) for x in filter(None, firmware.split("."))]

        return [manufacturer] + numbers + [idx]

    return sorted(profiles, key=key_f)

In [None]:
from IPython.display import display, Markdown
firmwares = {}
fwinfo = {}

for vendor in metadata.keys():
    for entry in metadata.get(vendor):
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            support = man.support_profile
            cpps = man.cryptoprops
            fw = support.firmware_version
            firmwares.setdefault(vendor,set())
            firmwares[vendor].add(fw)

            fwn = f'{vendor} {fw}'
            if vendor == "INTC" and fw in set(errors.keys()):
                fwn = fwn.replace(fw, errors[fw])
            
            fwinfo.setdefault(fwn, {})

            
            pt_year = support.results.get('TPM2_PT_YEAR').value
            pt_revision = support.results.get('TPM2_PT_REVISION').value
            pt_day_of_year = support.results.get('TPM2_PT_DAY_OF_YEAR').value
            # Get image tag
            image_tag = support.test_info['Image tag'].strip('" ')
            # Check if we have any ecc cryptoprops results
            ecc = False
            if cpps is not None:
                sig_algs = ['ecdsa', 'ecdaa', 'ecschnorr']
                supp_algs = {x for x in cpps.results.keys() if any([y in x.value for y in sig_algs])}
                ecc = supp_algs != set() and any(
                    [res.data is not None and len(res.data.index) > 100  for alg in supp_algs if (res := cpps.results.get(alg)) is not None])
            


            convert_to_int = lambda x: int(x, 16) if isinstance(x, str) else x
            fwinfo[fwn]['Year'] = convert_to_int(pt_year)
            fwinfo[fwn]['Day'] = convert_to_int(pt_day_of_year)
            fwinfo[fwn]['TPM Revision'] = pt_revision
            
            fwinfo[fwn].setdefault('Image tag', [])
            if image_tag not in fwinfo[fwn]['Image tag']:                                 
                fwinfo[fwn]['Image tag'].append(image_tag)
            # As to overwrite older results
            if fwinfo[fwn].get('ECC') is None or not fwinfo[fwn]['ECC']:
                fwinfo[fwn]['ECC'] = ecc 
        

fwinfo

In [None]:
def sorted_fws(fws):
    return sorted(
        fws,
        key=lambda x: list(map(int, x.split('.')))
    )

def delimit_fws(fws, period):
    new = []
    for i in range(len(fws)):
        if i != 0: 
            new.append(', ')
        if i != 0 and i % period == 0:
            new.append('<br>')
        new.append(fws[i])
    return ''.join(new)
    

In [None]:
# remove wmv
del firmwares['VMW']

In [None]:
print(_table([[vendor, delimit_fws(sorted_fws(fws), 5)] for vendor, fws in firmwares.items()], 1, ['Vendor', 'Firmware versions'] ))

In [None]:
header = []
rows = []
for i, (vendor, fws) in enumerate(firmwares.items()):
    header.append(vendor)
    for j, fw in enumerate(sorted_fws(fws)):
        if j >= len(rows):
            rows.append([' ']*len(firmwares.keys()))
        rows[j][i] = fw

display(Markdown(_table(rows, 1, header)))

## Table (vendor, tpm version, tpmalgtest/tpm_pcr used for collection)

In [None]:
tpm_algtest_firmwares = set()

for vendor, fws in firmwares.items():
    for firmware in fws:
        tpm_algtest_firmwares.add(f'{vendor} {firmware}')

In [None]:
with open('tpm-pcr-metadata.json') as f:
    tpm_pcr_metadata = json.load(f)

tpm_pcr_firmwares = set(tpm_pcr_metadata.keys())

for fwn in tpm_pcr_firmwares:
    fwinfo.setdefault(fwn, {})
    fwinfo[fwn]['TPM Version']  = tpm_pcr_metadata[fwn]['TPM Version']
    fwinfo[fwn]['TPM Revision'] = tpm_pcr_metadata[fwn]['TPM Revision']


In [None]:
all_firmwares = tpm_pcr_firmwares | tpm_algtest_firmwares
all_firmwares = list(all_firmwares)

In [None]:
print("\\begin{tabular}{ c c c c}")
print("Vendor & Firmware version & Year & TPM Version & TPM Revision & tpm2-algtest & TPM\\_PCR \\\\ \\hline")

for fwn in tpm_sorted(all_firmwares, lambda x: x):
    vendor, fwv = fwn.split(' ')
    
    
    if fwn in tpm_algtest_firmwares:
        tpm_version = '2.0'
    elif fwinfo[fwn].get('TPM Version') is not None:
        tpm_version = fwinfo[fwn]['TPM Version']
    else:
        tpm_version = '-'

    fwinfo[fwn]['tpm2-algtest'] = fwn in tpm_algtest_firmwares
    fwinfo[fwn]['TPM_PCR'] = fwn in tpm_pcr_firmwares
        
    print(
        f"{vendor} &{fwv} &"
        f"{fwinfo[fwn]['Year'] if 'Year' in fwinfo[fwn] else '-'} &"  
        f"{tpm_version} &"
        f"{fwinfo[fwn]['TPM Revision']} &"
        f"{'Y' if fwn in tpm_algtest_firmwares else '-'} & "
        f"{'Y' if fwn in tpm_pcr_firmwares else '-'} \\\\ \\hline ")

print("\\end{tabular}")

### TPM timeline diagram 

In [None]:
fwinfo

In [None]:
with open('firmware-info.json', 'w') as f:
    json.dump(fwinfo, f)