# Miscellaneous 
In this notebook, we perform some misc. tasks regarding the TPM dataset

- Creation of a clean dataset of result files used for analysis
- Making table of TPM firmwares we have in our dataset 

First we start with loading the metadata for dataset we already have, which is not cleaned

In [None]:
import json

def load_metadata(metadata_path):
    try:
        metadata = {}
        with open(metadata_path, "r") as f:
            metadata = json.load(f)

        assert metadata
        entries = metadata["entries"].values()
        assert 0 < len(entries)
    except:
        print("report_create: retrieving metadata was unsuccessful")
        return {}

    # We now group entries by vendor
    grouped = {}
    for entry in entries:
        vendor = entry.get("vendor")
        if vendor is None:
            print(
                f"report_create: entry {entry} does not contain vendor")
            continue

        grouped.setdefault(vendor, [])
        grouped[vendor].append(entry)
    return grouped

metadata = load_metadata('../metadata.json')

In [None]:
metadata.keys()

Now what we want to do in order to create a clear dataset of results:

- Wry to parse each file using the tools from `algtestprocess`, take note of each missing result and note the paths for results which have to be checked and possibly wont be included to the resulting dataset. This has to be done manually by default we wont remove any results which had some successful results from at least one category: support, performance, or cryptographic properties

In [None]:
import logging, sys
logging.disable(sys.maxsize)

In [None]:
from algtestprocess.modules.data.tpm.manager import TPMProfileManager
from algtestprocess.modules.data.tpm.enums import CryptoPropResultCategory as cat

cryptoprops = []
for vendor in metadata.keys():
    for entry in metadata.get(vendor):
        tpm_name = entry['TPM name']
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            performance = man.performance_profile
            support = man.support_profile
            cpps = man.cryptoprops
            assert performance or support or cpps
            # By default we cannot remove these kind of results, because even though 
            # some phase of measurement might have failed or intentionally skipped, 
            # other however might have Tbeen successful, as such we dont want to lose data.
            if performance is None:
                print(f"MISSING - performance profile at {measurement_path}")
            elif len(performance.results) < 10:
                print(f"Suspiciously low number of performance profile entries {len(performance.results)=} at {measurement_path}")
            
            if support is None:
                print(f"MISSING - support profile at {measurement_path}")
            elif len(support.results) < 10:
                print(f"Suspiciously low number of support profile entries {len(support.results)=} at {measurement_path}")

            if cpps is None:
                print(f"MISSING - cryptoprops profile at {measurement_path}")
            elif len(cpps.results) < 5:
                print(f"Suspiciously low number of cryptoprops profile entries {len(cpps.results)=} at {measurement_path}")
            

In [None]:
blacklist = []

Now it is assumed that you put all the bad result path into blacklist. We may now create the clean dataset by moving all results and sorting them to folders by manufacturer and firmware version.

In [None]:
import os
import subprocess

output_folder = os.path.join('./tpm-dataset/')
os.mkdir(output_folder)

used_names = set()
counter = 0
for vendor in metadata.keys():
    vendor_folder = os.path.join(output_folder, vendor)
    os.mkdir(vendor_folder)
    
    for entry in metadata.get(vendor):
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            support = man.support_profile
            fw = support.firmware_version
            assert support and fw
            firmware_folder = os.path.join(vendor_folder, fw)

            assert firmware_folder is not None
            if not os.path.exists(firmware_folder):
                os.mkdir(firmware_folder)
        
            folder_name = measurement_path.split('/')[-1]
            if folder_name in used_names:
                # We have folder named same in the measurements so we must rename it
                firmware_folder = os.path.join(firmware_folder, f"{folder_name}-{fw}-{counter}")
                counter += 1
            else:
                firmware_folder = os.path.join(firmware_folder, folder_name)

            used_names.add(folder_name)

            print(f"Copying {folder_name} to {firmware_folder}")
            subprocess.run(['cp', '-r', measurement_path, firmware_folder])
            

# TPM firmware table
- now we create a firmware table out of the results we have cleaned 

In [None]:
#!python ../process.py tpm metadata-update ./tpm-dataset

In [None]:
metadata = load_metadata('./metadata.json')

## Utilities

In [None]:
import re
from math import inf

def _table(l, cols, header):
    # header repeat col times
    out = ""
    out += "| " + (" | ".join(header) + " | ") * cols + "\n"
    out += "| " + ("|".join(["---"] * (cols * len(header))) + " | ") + "\n"

    i = 0
    while i < len(l):
        out += "| "
        for _ in range(cols):
            if i < len(l):
                entries = l[i]
            else:
                entries = [" " for _ in range(len(header))]

            assert len(entries) == len(header)

            out += " | ".join(map(str, entries)) + " | "
            i += 1
        out += "\n"
    return out

In [None]:
from IPython.display import display, Markdown
firmwares = {}
for vendor in metadata.keys():
    for entry in metadata.get(vendor):
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            support = man.support_profile
            fw = support.firmware_version
            firmwares.setdefault(vendor,set())
            firmwares[vendor].add(fw)


firmwares

In [None]:
def sorted_fws(fws):
    return sorted(
        fws,
        key=lambda x: list(map(int, x.split('.')))
    )

def delimit_fws(fws, period):
    new = []
    for i in range(len(fws)):
        if i != 0: 
            new.append(', ')
        if i != 0 and i % period == 0:
            new.append('<br>')
        new.append(fws[i])
    return ''.join(new)
    

In [None]:
# remove wmv
del firmwares['VMW']

In [None]:
print(_table([[vendor, delimit_fws(sorted_fws(fws), 5)] for vendor, fws in firmwares.items()], 1, ['Vendor', 'Firmware versions'] ))

In [None]:
header = []
rows = []
for i, (vendor, fws) in enumerate(firmwares.items()):
    header.append(vendor)
    for j, fw in enumerate(sorted_fws(fws)):
        if j >= len(rows):
            rows.append([' ']*len(firmwares.keys()))
        rows[j][i] = fw

display(Markdown(_table(rows, 1, header)))