# RSA fingerprinting

In [None]:
#!python -m venv venv
#!source /home/tjaros/Development/python/310/git/algtest-pyprocess/pyprocess-venv/bin/activate.fish
#!python -m pip install -r ./requirements.txt
#!pip install pandas

In [None]:
#!git clone git@github.com:crocs-muni/RSABias.git
#!cp ./RSABias/roca.py .
#!cp ./RSABias/rsa_fingerprint.py .

In [None]:
from algtestprocess.modules.utilities.rsa_fingerprint import *

### Creation of the metadata.json

If you already have the `metadata.json` file you want to use, please skip this step

In [None]:
#!python ../process.py tpm metadata-update /home/tjaros/storage/research/tpm/

### Load metadata

In [None]:
import json

def load_metadata(metadata_path):
    try:
        metadata = {}
        with open(metadata_path, "r") as f:
            metadata = json.load(f)

        assert metadata
        entries = metadata["entries"].values()
        assert 0 < len(entries)
    except:
        print("report_create: retrieving metadata was unsuccessful")
        return {}

    # We now group entries by vendor
    grouped = {}
    for entry in entries:
        vendor = entry.get("vendor")
        if vendor is None:
            print(
                f"report_create: entry {entry} does not contain vendor")
            continue

        grouped.setdefault(vendor, [])
        grouped[vendor].append(entry)
    return grouped

metadata = load_metadata('./metadata.json')

### Load profiles from vendor
Choose the vendor you want to look at below


In [None]:
import logging, sys
logging.disable(sys.maxsize)

In [None]:
from algtestprocess.modules.data.tpm.manager import TPMProfileManager
from algtestprocess.modules.data.tpm.enums import CryptoPropResultCategory as cat

cryptoprops = []
for tpms in metadata.values():
    for entry in tpms:
        for measurement_path in entry['measurement paths']:
            man = TPMProfileManager(measurement_path)
            cpps = man.cryptoprops
            if cpps is None:
                continue
            cryptoprops.append(cpps)

### Merging of profiles for same firmwares

In [None]:
def firmware_name(profile):
    return f"{profile.manufacturer} {profile.firmware_version}"

def group_by_vendor(cpps):
    grouped = {}
    for cpp in cpps:
        manufacturer = cpp.manufacturer
        grouped.setdefault(manufacturer, [])
        grouped[manufacturer].append(cpp)
    return grouped

def aggregate_by_firmware(cpps):
    aggregated = {}
    for cpp in cryptoprops:
        if cpp is None:
            print(f"Cannot merge {cpp.firmware_name}, as it is None")
            continue

        fwn = firmware_name(cpp)
        if fwn not in aggregated:
            aggregated.setdefault(fwn, cpp)
        else:
            aggregated[fwn] = aggregated[fwn] + cpp
    return [cpp for cpp in aggregated.values()]
    
# All tpms in one list
tpms = cryptoprops
# All tpms grouped by vendor
tpms_grouped_by_vendor = group_by_vendor(tpms)
# All tpms aggregated by the tpm name
tpms_aggregated = aggregate_by_firmware(cryptoprops)
# All tpms aggregated by the tpm name and grouped by vendor
tpms_aggregated_grouped_by_vendor = group_by_vendor(tpms_aggregated)

### Utility functions

In [None]:
import re
from math import inf

def tpm_sorted(profiles, device_name):
    """
    Sorts the profiles according to manufacturer id alphabetically, then
    firmware version numerically

    Assumes device name is in the form of rgx
    """
    RGX = r"(\s*.+)+\s\s*\d+(\.\d+)*(\s[\[]\d+[\]])?"
    try:
        assert all([re.match(RGX, device_name(p)) is not None for p in profiles])
    except AssertionError:
        print("These device names do not match format")
        print([name for p in profiles if not re.match(RGX, (name := device_name(p)))])

    def key_f(profile):
        manufacturer = version = idx = inf
        numbers = [inf] * 4
        l, r = device_name(profile).rsplit(maxsplit=1)

        if re.match(r"[\[]\d+[\]]", r):
            idx = int(r.replace("[", "").replace("]", ""))
            manufacturer, firmware = l.rsplit(maxsplit=1)
        else:
            manufacturer, firmware = l, r

        numbers = [int(x) for x in filter(None, firmware.split("."))]

        return [manufacturer] + numbers + [idx]

    return sorted(profiles, key=key_f)

def _table(l, cols, header):
    # header repeat col times
    out = ""
    out += "| " + (" | ".join(header) + " | ") * cols + "\n"
    out += "| " + ("|".join(["---"] * (cols * len(header))) + " | ") + "\n"

    i = 0
    while i < len(l):
        out += "| "
        for _ in range(cols):
            if i < len(l):
                entries = l[i]
            else:
                entries = [" " for _ in range(len(header))]

            assert len(entries) == len(header)

            out += " | ".join(map(str, entries)) + " | "
            i += 1
        out += "\n"
    return out


### Visualisating the RSA primes 

In [None]:
import matplotlib.pyplot as plt
from algtestprocess.modules.visualization.heatmap import Heatmap
import matplotlib.lines as mlines
import pandas as pd
import matplotlib.gridspec as gridspec

plt.rcParams.update({
    "text.usetex": True,
    "ytick.color" : "black",
    "xtick.color" : "black",
    "axes.labelcolor" : "black",
    "axes.edgecolor" : "black",
    "font.family" : "serif",
    "font.serif" : ["Computer Modern Serif"]
})

def firmwarelist2id(firmware_versions):
    # First we group the same major version TPMs
    firmware_versions = sorted(firmware_versions, key=lambda x: [int(y) for y in x.split('.')])
    versions = {}
    for fv in firmware_versions:
        major = fv.split('.')[0]
        if versions.get(major) is None:
            versions.setdefault(major, [])
        
        versions[major].append(fv)
        

    # Then we start building the result
    result = ""
    for major, entries in versions.items():
        if len(result.split('\n')[0]) > 12:
            result += "\n"
        
        if result != "":
            result += " "
        if len(entries) == 1:
            result += entries[0]
        else:
            fst = entries[0].split('.')[1]
            lst = entries[-1].split('.')[1]

            if fst == lst:
                result += f"{major}.{fst}.X"
            else:
                result += f"{major}.{fst}-{lst}.X"

    # STM specific edit which should not be here
    if '1.258-769.X' in result:
        result = result.replace('1.258-769.X', '') + ' 1.258-769.X'

    return result
    
plutons = [
    ('AMD', '6.24.0.6', True, False, True),
    ('MSFT', '6.3.1.603', False, True, False)
]    



def cpps2fig(cpps, fig, alg, add_fingerprint, cpus=None):
    if cpps is None:
        return None

    result = None 
    if alg is not None:
        result = cpps.results.get(alg)
        if result is None:
            return None

    tpm_name = f"{cpps.manufacturer}"
    if isinstance(cpps.firmware_version, list):
        tpm_name += f" {firmwarelist2id(cpps.firmware_version)}"
    else:
        tpm_name += f" {cpps.firmware_version}"

    # Temporary solution to label plutons
    for man, ver, first, last, add_man in plutons:
        if man in tpm_name and ver in tpm_name:
            tpm_name = tpm_name.replace(ver, ('\n' if first else '') +(f"{man} " if add_man else '')+ ver + ('\n' if last else '')+ " (Pluton)")

    df = None
    if alg is None:
        # Special case when alg is None means we should merge the rsa dataframes
        dfs = []
        for alg in [cat.RSA_1024, cat.RSA_2048]:
            result = cpps.results.get(alg)
            if result is not None:
                dfs.append(result.data)
        assert dfs != []
        df = pd.concat(dfs)
    else:
        df = result.data

    # add the fingerprint analysis results
    fp = None
    if add_fingerprint:
        pqn = df[['p', 'q', 'n']]
        # We drop the nan rows
        pqn = pqn.dropna(subset=["p", "q", "n"]).values.tolist()
    
        fp = RSAFingerprintSet(pqn, recompute_q=True)
        fp.compute_fingerprint()
        fp = str(fp)
        print(fp)
            

    if len(df.dropna(subset=["p", "n"])) < 50:
        return None
    
    return Heatmap(df, tpm_name, fig=fig, ticks=False, legend=False, parts=['title', 'heatmap', 'text'], part_height_ratios=[1, 0.1], text_font_size = 9, title_font_size=5, label_values=False).build()

def create_multiplot(tpms, nrows, ncols, alg, figsize=(8.3, 11.7), add_fingerprint=False, add_legend=True, cpus=None):
    # Creating the figure with a constrained layout to avoid axes overlapping
    fig = plt.figure(layout='constrained', figsize=figsize, dpi=800)
    #fig.get_layout_engine().set(w_pad=0, h_pad=0, hspace=0, wspace=0)
    GridSpec = gridspec.GridSpec(ncols=ncols, nrows=nrows, figure= fig)
    subfig_count = ncols * nrows
    count = 0
    row = 0
    col = 0
    for cpps in tpms:
        if count >= subfig_count:
            break
        
        subfig = fig.add_subfigure(GridSpec[row, col])
        subfig.set_facecolor("none")
        
        if cpps2fig(cpps, subfig, alg, add_fingerprint, cpus=cpus) is not None:
            if col + 1 >= ncols:
                row += 1
                col  = 0
            else:
                col += 1
            count += 1
        else:
            print(f"Plot for {cpps.device_name} failed")

    if add_legend:
        # defining legend style and data
        green_line = mlines.Line2D([], [], color='green', label='$P_{min}$', linestyle='dashed')
        blue_line = mlines.Line2D([], [], color='blue', label='$P_{max}$', linestyle='dashed')
        orange_line = mlines.Line2D([], [], color='orange', label='$Q_{min}$', linestyle='dashed')
        purple_line = mlines.Line2D([], [], color='purple', label='$Q_{max}$', linestyle='dashed')
        skyblue_line = mlines.Line2D([], [], color='black', label='$P=Q$', linestyle='dashed')

        fig.legend(handles=[green_line, blue_line, orange_line, purple_line, skyblue_line], loc='lower right', bbox_to_anchor=(1,0), bbox_transform=fig.transFigure, fontsize='x-large')
    return fig

## Create the multiplots

In [None]:
def aggregate_by_implementation(cpps, groups):
    compiled_groups = [[re.compile(pattern) for pattern in group] for group in groups]

    aggre = [None] * len(groups)
    for cpp in cpps:
        fwn = firmware_name(cpp)
        for i, group in enumerate(compiled_groups):
            if any([pattern.match(fwn) is not None for pattern in group]):
                if aggre[i] is None:
                    aggre[i] = cpp
                else:
                    aggre[i] += cpp
    return aggre

In [None]:
#create_multiplot(tpms_aggregated, 8, 8, cat.RSA_2048, figsize=(22.14, 16.6), add_fingerprint=False)

### INTC

In [None]:
cpus = {
    "9.5.65.3000": (
        "4th Gen.",
        "Mobile Haswell",
        "ThinkPad X240 20AMS22U0A",
        "Intel Core i7-4600U",
        "laptop",
    ),
    "2.0.2.2092": (
        None,
        "Mobile Braswell",
        "Lenovo ideapad 100S-14IBR 80R9",
        "Dual-Core Intel Celeron N3050",
        "desktop",
    ),
    "2.0.4.3098": (
        None,
        "Mobile Braswell",
        "ASUStec X541SC",
        "Intel® Quad-Core Celeron® N3160 Processor 2016",
        "desktop",
    ),
    "3.0.12.1138": (
        "7th gen",
        "Mobile Apollo Lake",
        "Lenovo IdeaPad 120s-11iap",
        "Intel Celeron N3350",
        "laptop",
    ),
    "10.0.36.1030": (
        "5th Gen.",
        "Mobile Broadwell",
        "Lenovo B50-50 80S2",
        "Intel i3-5005U",
        "laptop",
    ),
    "10.0.60.3000": (
        "5th Gen.",
        "Mobile Broadwell",
        "ThinkPad T450 20BUS1CJ1T",
        "Intel Core i5-5300U",
        "laptop",
    ),
    "11.0.0.1160": (
        "6th Gen.",
        "Mobile Skylake",
        "Lenovo 80RU",
        "Intel® Core™ i7-6700HQ",
        "laptop",
    ),
    "11.0.2.1003": (None, None, None, "Lenovo Product unknown", "desktop"),
    "11.8.50.3399": (
        "5th Gen.",
        "Mobile Broadwell",
        "Lenovo B50-50 80S2",
        "Intel i3-5005U",
        "laptop",
    ),
   # "11.8.50.3425": (
   #     "7th gen",
   #     "due to similarity with Gigabyte Z270X-Gaming 5) 7 or 8th Gen. Mobile Kaby Lake / Kaby Lake R",
   #     None,
   #     "likely 7th Gen due to similarity with Gigabyte Z270X-Gaming 5",
   #     "laptop",
   # ),
   # "302.12.0.0": (
   #     "6th Gen.",
   #     "Skylake Mobile Kaby Lake i7-6700HQ",
   #     "ThinkPad P50 20EQS64N13",
   #     "Intel Core i7-6820HQ",
   #     "laptop",
   # ),
    "303.12.0.0": (
        "6th Gen.",
        "Skylake",
        "ThinkPad P50 20EQS64N00",
        "Intel Core i7-6820HQ",
        "desktop",
    ),
    "401.1.0.0": ("8th gen", "Mobile Coffee Lake", "P65 Creator 8RF", "i7-8750H", "laptop"),
    "402.1.0.0": (
        "8th Gen.",
        "Mobile Whiskey lake",
        None,
        "WhiskeyLake Client Platform",
        "laptop",
    ),
    "403.1.0.0": (
        "8th Gen.",
        "Mobile Whiskey Lake",
        "Lenovo IdeaPad S340-15IWL",
        "Intel Core i5-8265U",
        "laptop",
    ),
    "500.5.0.0": (
        "10th Gen.",
        "Comet Lake",
        "Yoga Slim 7 15IIL05",
        "Intel Core i5-10300H",
        "desktop",
    ),
    "500.10.0.0": (
        None,
        None,
        "Acer Spin SP114-31N",
        "Intel® Pentium® Silver N6000",
        "desktop",
    ),
    "500.14.0.0": (
        "10th Gen.",
        "Comet Lake",
        "ASUS TUF Gaming F17 FX706LI_FX766LI i7-10750H",
        "2020",
        "desktop",
    ),
    "600.7.0.0": ("11th gen", "Tiger Lake", "IdeaPad 3 15ITL6", "i5-1135G7", "desktop"),
    "600.7.41.2142": (
        "11th Gen.",
        "Tiger Lake",
        None,
        "Tiger Lake Client Platform",
        "desktop",
    ),
    "600.7.42.2235": (
        "11th Gen.",
        "Tiger Lake",
        "HP Pavilion Gaming Laptop 15-dk2xxx",
        "Intel® Core™ i5-11300H",
        "desktop",
    ),
    "600.18.0.0": ("12th gen", "Alder Lake", "IdeaPad 5 Pro 14IAP7", "i7-1260P", "desktop"),
    "600.18.25.1917": (
        "13th Gen.",
        "Raptor Lake",
        None,
        "Raptor Lake Client Platform",
        "desktop",
    ),
    "600.18.25.2020": (
        "13th Gen.",
        "Raptor Lake",
        None,
        "Raptor Lake Client Platform",
        "desktop",
    ),
    "600.18.25.2027": (
        "12th Gen.",
        "Alder Lake",
        None,
        "Alder Lake Client Platform",
        "desktop",
    ),
}

In [None]:
groups = [
    [
        r'INTC 2\.0\..*',           # INTC 2.0.X
        r'INTC 10\.0\..*',          # INTC 10.0.X
        r'INTC 11\.[0-8]\..*',      # INTC 11.0-8.X
        r'INTC 30[2-3]\.12\.0\.0',  # INTC 302-303.12.0.0
    ],
    [
        r'INTC 40[1-2]\.1\..*',     # INTC 40[1-2].1.0.0
    ],
    [
        r'INTC 403\.1\.0\.0',       # INTC 403.1.0.0
    ],
    [
        r'INTC 500\..*',    # INTC 500.5-14.X
        r'INTC 600\..*'     # INTC 600.7-18.X
    ]
]
intc_aggre = aggregate_by_implementation(tpms_aggregated, groups)

In [None]:
f = create_multiplot(intc_aggre, 1, 4, None, figsize=(8.3, 2), add_legend=False, cpus=cpus)

In [None]:
f.savefig("intc.pdf", dpi=300, bbox_inches="tight")

### IFX

In [None]:
groups = [
    [
        r'IFX 5\.61\..*',
        r'IFX 7\.40\..*',
        r'IFX 7\.61\..*'
    ],
    [
        r'IFX 7\.6[2-3]\..*',
        r'IFX 5\.63\.13\.6400'

    ],
    [
        r'IFX 7\.8[3-5]\..*' 
    ],
]
ifx_aggre = aggregate_by_implementation(tpms_aggregated, groups)

In [None]:
f=create_multiplot(ifx_aggre, 1, 4,None, figsize=(8.3, 2), add_legend=False)

In [None]:
f.savefig("ifx.pdf", dpi=300, bbox_inches="tight")

### NTC

In [None]:
groups = [
    [
        r'NTC 7.2.3.1'
    ]
]
ntc_aggre = aggregate_by_implementation(tpms_aggregated, groups)

In [None]:
f = create_multiplot(ntc_aggre, 1, 1, None, figsize=(2, 2), add_legend=False)

In [None]:
f.savefig("ntc.pdf", dpi=300, bbox_inches="tight")

### STM

In [None]:
groups = [
    [
        r'STM *'
    ]
]
stm_aggre = aggregate_by_implementation(tpms_aggregated, groups)

In [None]:
create_multiplot(stm_aggre, 1, 1, None, figsize=(2, 2), add_legend=False)

In [None]:
groups = [
    [
        r'AMD .*'
    ],
    [
        r'NTC .*'
    ],
    [
        r'STM .*'
    ],
    [
        r'MSFT .*'
    ]
]
nsa_aggre = aggregate_by_implementation(tpms_aggregated, groups)

In [None]:
f=create_multiplot(nsa_aggre, 1, 4,None, figsize=(8.3, 2), add_legend=False)

In [None]:
f.savefig("amd-ntc-stm-msft.pdf", dpi=300, bbox_inches="tight")

In [None]:
nsa_aggre[3].results[cat.RSA_2048].paths