# Metrics extraction

In [None]:
import pandas as pd
import numpy as np

In [None]:
class DilithiumStats:
    def __init__(self, design_id: str, initial_tv: int, last_tv: int):
        self.base_template_vars = {
            "design_id": design_id,
            "initial_tv": initial_tv,
            "last_tv": last_tv
        }
        self.file_template = "{op_type}_{design_id}_lvl{sec_level}_tv{initial_tv}_{last_tv}.csv"

    def get_filename(self, op_type, sec_level):
        return self.file_template.format(op_type=op_type, sec_level=sec_level, **self.base_template_vars)

    def get_keygen_data(self):
        keygen_lvl2_df = pd.read_csv(self.get_filename(op_type="keygen", sec_level="2")).drop(labels="success", axis=1)
        keygen_lvl3_df = pd.read_csv(self.get_filename(op_type="keygen", sec_level="3")).drop(labels="success", axis=1)
        keygen_lvl5_df = pd.read_csv(self.get_filename(op_type="keygen", sec_level="5")).drop(labels="success", axis=1)
        keygen_lvl5_df = keygen_lvl5_df.rename(columns={col: f"{col}_lvl5" for col in keygen_lvl5_df.columns if "cycles" in col})
        keygen_df = pd.merge(keygen_lvl2_df, keygen_lvl3_df, on='test_num', suffixes=["_lvl2", "_lvl3"])
        keygen_df = pd.merge(keygen_df, keygen_lvl5_df, on='test_num', suffixes=[None, "_lvl5"])
        return keygen_df
    
    def get_sign_data(self):
        sign_lvl2_df = pd.read_csv(self.get_filename(op_type="sign", sec_level="2")).drop(labels="success", axis=1)
        sign_lvl3_df = pd.read_csv(self.get_filename(op_type="sign", sec_level="3")).drop(labels="success", axis=1)
        sign_lvl5_df = pd.read_csv(self.get_filename(op_type="sign", sec_level="5")).drop(labels="success", axis=1)

        for current_sign_df in [sign_lvl2_df, sign_lvl3_df, sign_lvl5_df]:
            current_sign_df['rejects_tries_count'] = (current_sign_df['rejects_count'] + 1)
            current_sign_df.drop(labels="rejects_count", axis=1, inplace=True)
        sign_lvl5_df = sign_lvl5_df.rename(columns={col: f"{col}_lvl5" for col in sign_lvl5_df.columns if (("cycles" in col) or ("tries" in col))})

        sign_df = pd.merge(sign_lvl2_df, sign_lvl3_df, on='test_num', suffixes=["_lvl2", "_lvl3"])
        sign_df = pd.merge(sign_df, sign_lvl5_df, on='test_num', suffixes=[None, "_lvl5"])
        return sign_df
    
    def get_verify_data(self):
        verify_lvl2_df = pd.read_csv(self.get_filename(op_type="verify", sec_level="2")).drop(labels="success", axis=1)
        verify_lvl3_df = pd.read_csv(self.get_filename(op_type="verify", sec_level="3")).drop(labels="success", axis=1)
        verify_lvl5_df = pd.read_csv(self.get_filename(op_type="verify", sec_level="5")).drop(labels="success", axis=1)
        verify_lvl5_df = verify_lvl5_df.rename(columns={col: f"{col}_lvl5" for col in verify_lvl5_df.columns if "cycles" in col})
        verify_df = pd.merge(verify_lvl2_df, verify_lvl3_df, on='test_num', suffixes=["_lvl2", "_lvl3"])
        verify_df = pd.merge(verify_df, verify_lvl5_df, on='test_num', suffixes=[None, "_lvl5"])
        return verify_df

In [None]:
lr_stats = DilithiumStats(design_id="perf0", initial_tv=0, last_tv=9)
hp_stats = DilithiumStats(design_id="perf1", initial_tv=0, last_tv=9)

## Key generation

In [None]:
hp_keygen_df = hp_stats.get_keygen_data()
lr_keygen_df = lr_stats.get_keygen_data()

In [None]:
hp_keygen_df.describe()

In [None]:
lr_keygen_df.describe()

## Verify

In [None]:
hp_verify_df = hp_stats.get_verify_data()
lr_verify_df = lr_stats.get_verify_data()

In [None]:
hp_verify_df.describe()

In [None]:
lr_verify_df.describe()

## Signing

In [None]:
hp_sign_df = hp_stats.get_sign_data()
lr_sign_df = lr_stats.get_sign_data()

In [None]:
hp_sign_df.describe()

### Best and mean time

In [None]:
def calculate_best_and_mean_time(dataframe, cols_names_list):
    result_dict = dict()

    for sec_lvl in [2, 3, 5]:
        result_dict[sec_lvl] = dict()
        for col_name in cols_names_list:
            best_time = dataframe[dataframe[f'rejects_tries_count_lvl{sec_lvl}'] == 1][f'{col_name}_lvl{sec_lvl}'].mean()
            mean_time = dataframe[f'{col_name}_lvl{sec_lvl}'].mean()
            result_dict[sec_lvl][col_name] = {'best': best_time, 'mean': mean_time}

    return result_dict

In [None]:
calculate_best_and_mean_time(hp_sign_df, ['total_cycles'])

In [None]:
calculate_best_and_mean_time(
    lr_sign_df,
    ['load_sk_cycles', 'load_msg_cycles', 'exec_cycles', 'unload_cycles', 'total_cycles']
)

### Reject loop time estimation (unused)

In [None]:
def estimate_reject_loop_time(dataframe, cycles_col_name: str):
    result_list = []

    for sec_lvl in [2, 3, 5]:
        estimatives = []
        tries_col_name_lvl = f'rejects_tries_count_lvl{sec_lvl}'
        cycles_col_name_lvl = f'{cycles_col_name}_lvl{sec_lvl}'

        retries_num_list = sorted(list(dataframe[tries_col_name_lvl].unique()))
        for idx in range(1, len(retries_num_list)):
            current_retry_num = retries_num_list[idx]
            previous_retry_num = retries_num_list[idx-1]
            retry_num_delta = current_retry_num - previous_retry_num

            current_time = dataframe[dataframe[tries_col_name_lvl] == current_retry_num][cycles_col_name_lvl].mean()
            previous_time = dataframe[dataframe[tries_col_name_lvl] == previous_retry_num][cycles_col_name_lvl].mean()
            estimatives.append((current_time-previous_time)/retry_num_delta)

        result_list.append(np.mean(estimatives))

    return tuple(result_list)

In [None]:
estimate_reject_loop_time(dataframe=hp_sign_df, cycles_col_name="total_cycles")