# Stats - Signature - Only 

In [1]:
import sys
if '../../../software' not in sys.path:
    sys.path.append('../../../software')
if 'python-dilithium' not in sys.path:
    sys.path.append('python-dilithium')
sys.path

['/home/alexis/chipwhisperer.dilithium/jupyter/courses/fault101',
 '/usr/lib/python38.zip',
 '/usr/lib/python3.8',
 '/usr/lib/python3.8/lib-dynload',
 '',
 '/home/alexis/.venvs/chipwhisperer.dilithium/lib/python3.8/site-packages',
 '../../../software',
 'python-dilithium']

In [2]:
import json
import chipwhisperer as cw
import ipywidgets as widgets
from collections import defaultdict
import pickle
import math
import linear
import numpy as np
from dilithium import Dilithium
import random
import chipwhisperer.common.results.glitch as glitch
import pandas as pd
from operator import itemgetter
import itertools
from frozendict import frozendict
import logging
import plotly.express as px
import matplotlib.pyplot as plt
import enum
from sklearn.cluster import DBSCAN

In [3]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

__LOGGER = logging.getLogger(__name__)

d = Dilithium(2)

In [4]:
# FILE_NAME = 'gc.results.pickled.signature-off1.5wid0.3-2023-03-14_05-21-19.pickle'
FILE_NAME = 'gc.results.pickled.signature-ext50plusoff1.5wid0.3-2023-03-14_13-38-16.pickle'

In [5]:
with open(FILE_NAME, "rb") as f:
    gr = pickle.load(f)

In [6]:
with open(FILE_NAME, "rb") as f:
    gr = pickle.load(f)

# get rid of constant, that shit is annoying ...
constant = gr.result_dict['constant'][0]['metadata']
del gr.result_dict['constant']
gr.groups = [group for group in gr.groups if group != 'constant']

# recreate the GlitchController object ...
gc = glitch.GlitchController(gr.groups, gr.parameters)

# add the results
for group, results in gr.result_dict.items():
    for result in results:
        parameters = result["parameters"]
        strdesc = result["strdesc"]
        metadata = result["metadata"]
        
        try:
            gc.add(group, parameters, strdesc, metadata)
        except TypeError:  # will be raised if we do not "gc.display_stats"; but not a problem: still collects all data
            pass

# fix that params are a tuple and not a list
for group, results in gc.results.result_dict.items():
    for result in results:
        result['parameters'] = tuple(result['parameters'])
        
# set ranges ...          
result_list_full = [result for group, result_list in gc.results.result_dict.items() if group != 'constant' for result in result_list]
if len(result_list_full) == 0:
    raise RuntimeError("GlitchResults object contains no results!")
for parameter_index, parameter in enumerate(gc.parameters):
    all_distinct_parameters = set(map(lambda result: result['parameters'][parameter_index], result_list_full))
    parameter_min = min(all_distinct_parameters)
    parameter_max = max(all_distinct_parameters)
    gc.set_range(parameter, parameter_min, parameter_max)

# We successfully recreate the GlitchController object!
    
def display_stats() -> None:
    """
    Custom version of display_stats because we do not know the 'current' set parameters.
    Thus we do display the current set parameters but the min and max of each parameter.
    """
    gc.widget_list_groups = [widgets.IntText(
        value=len(gc.results.__dict__['result_dict'][group]),
        description=f"{group} count:",
        disabled=True,
        layout=widgets.Layout(width='100%')
    ) for group in gc.groups]
    gc.widget_list_parameter = [widgets.FloatRangeSlider(
        value=[gc.parameter_min[i], gc.parameter_max[i]],
        min=gc.parameter_min[i],
        max=gc.parameter_max[i],
        description=f'{parameter} setting',
        disabled=True,
        readout_format='f',
        layout=widgets.Layout(width='100%')
    ) for i, parameter in enumerate(gc.parameters)]

    display(*(gc.widget_list_groups + gc.widget_list_parameter))

display_stats()

IntText(value=367, description='zeros count:', disabled=True, layout=Layout(width='100%'))

IntText(value=0, description='other count:', disabled=True, layout=Layout(width='100%'))

IntText(value=214390, description='to count count:', disabled=True, layout=Layout(width='100%'))

IntText(value=1297, description='to time count:', disabled=True, layout=Layout(width='100%'))

IntText(value=0, description='exc sign count:', disabled=True, layout=Layout(width='100%'))

IntText(value=0, description='exc get_sig count:', disabled=True, layout=Layout(width='100%'))

FloatRangeSlider(value=(50.0, 1835.0), description='ext_offset setting', disabled=True, layout=Layout(width='1…

FloatRangeSlider(value=(0.390625, 0.390625), description='offset setting', disabled=True, layout=Layout(width=…

FloatRangeSlider(value=(1.5625, 1.5625), description='width setting', disabled=True, layout=Layout(width='100%…

FloatRangeSlider(value=(1.0, 1.0), description='repeat setting', disabled=True, layout=Layout(width='100%'), m…

FloatRangeSlider(value=(0.0, 10.0), description='message setting', disabled=True, layout=Layout(width='100%'),…

FloatRangeSlider(value=(0.0, 10.0), description='redo setting', disabled=True, layout=Layout(width='100%'), ma…

In [7]:
pd.set_option('display.max_columns', None)

In [8]:
def count_le_beta(packed) -> np.uint:
    if type(packed) is not bytes:
        return None
    _, z, _ = d._unpack_sig(packed)
    return np.count_nonzero(np.abs(z) <= d.beta)
    

In [9]:
df = pd.json_normalize([{**result, 'group': group}
                        for group, result_list in gc.results.result_dict.items() if group != 'constant'
                        for result in result_list
                       ])

# rename column names
df.columns = [column.split('.')[-1] for column in df.columns]

# prepair group counts
for group in filter(lambda group: group != 'constant', gc.groups):
    df[group] = df['group'] == group
del df['group']

# set proper index
for i, parameter in enumerate(gc.parameters):
    df.insert(i, parameter, df['parameters'].apply(itemgetter(i)))
del df['parameters']
df = df.set_index('global_counter')
df = df.sort_index()

trig_count_nofault = df['trig_count'].value_counts().idxmax()  # is it really this value? Next time better save it in "constants"
assert trig_count_nofault % 4 == 0
trig_count_nofault_singleloop = trig_count_nofault // 4

df['le_beta'] = df['packed'].apply(count_le_beta)
df['trig_count_firstloop'] = df['trig_count'].apply(lambda x: x - trig_count_nofault_singleloop * (d.l - 1))
df['trig_count_ratio'] = df['trig_count_firstloop'].apply(lambda x: x / trig_count_nofault_singleloop)

del df['strdesc']

df

Unnamed: 0_level_0,ext_offset,offset,width,repeat,message,redo,reset_duration_ns,packed,message_int,message_bytes,trig_count,timestamp,zeros,other,to count,to time,exc sign,exc get_sig,le_beta,trig_count_firstloop,trig_count_ratio
global_counter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,50,0.390625,1.5625,1,0,0,,,0,b'\x00\x00\x00\x00',15904,1.678797e+09,False,False,True,False,False,False,,3976,1.000000
1,50,0.390625,1.5625,1,0,1,,,0,b'\x00\x00\x00\x00',15904,1.678797e+09,False,False,True,False,False,False,,3976,1.000000
2,50,0.390625,1.5625,1,0,2,,,0,b'\x00\x00\x00\x00',15904,1.678797e+09,False,False,True,False,False,False,,3976,1.000000
3,50,0.390625,1.5625,1,0,3,,,0,b'\x00\x00\x00\x00',15904,1.678797e+09,False,False,True,False,False,False,,3976,1.000000
4,50,0.390625,1.5625,1,0,4,,,0,b'\x00\x00\x00\x00',15904,1.678797e+09,False,False,True,False,False,False,,3976,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216049,1835,0.390625,1.5625,1,5,9,,,5,b'\x00\x00\x00\x05',15905,1.678915e+09,False,False,True,False,False,False,,3977,1.000252
216050,1835,0.390625,1.5625,1,5,10,,,5,b'\x00\x00\x00\x05',15904,1.678915e+09,False,False,True,False,False,False,,3976,1.000000
216051,1835,0.390625,1.5625,1,6,0,,,6,b'\x00\x00\x00\x06',15905,1.678915e+09,False,False,True,False,False,False,,3977,1.000252
216052,1835,0.390625,1.5625,1,6,1,,,6,b'\x00\x00\x00\x06',19266,1.678915e+09,False,False,True,False,False,False,,7338,1.845573


In [10]:
df_success_rates = df.copy()

df_success_rates = df_success_rates.reset_index().groupby(list(filter(lambda p: p != 'redo', gc.parameters))).agg({
    **{group: 'sum' for group in gc.groups if group != 'constant'}
})

df_success_rates['total good'] = df_success_rates[[group for group in gc.groups if constant['group_to_rating'][group] == 'good']].sum(axis=1)
df_success_rates['total bad'] = df_success_rates[[group for group in gc.groups if lambda group: constant['group_to_rating'][group] == 'bad']].sum(axis=1)

df_success_rates['success rate'] = df_success_rates['total good'] / (df_success_rates['total good'] + df_success_rates['total bad']).astype(np.float64)

df_success_rates

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,zeros,other,to count,to time,exc sign,exc get_sig,total good,total bad,success rate
ext_offset,offset,width,repeat,message,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
50,0.390625,1.5625,1,0,0,0,11,0,0,0,0,11,0.0
50,0.390625,1.5625,1,1,0,0,11,0,0,0,0,11,0.0
50,0.390625,1.5625,1,2,0,0,11,0,0,0,0,11,0.0
50,0.390625,1.5625,1,3,0,0,11,0,0,0,0,11,0.0
50,0.390625,1.5625,1,4,0,0,11,0,0,0,0,11,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835,0.390625,1.5625,1,2,0,0,11,0,0,0,0,11,0.0
1835,0.390625,1.5625,1,3,0,0,11,0,0,0,0,11,0.0
1835,0.390625,1.5625,1,4,0,0,11,0,0,0,0,11,0.0
1835,0.390625,1.5625,1,5,0,0,11,0,0,0,0,11,0.0


In [11]:
df_dsuccess_rates = df_success_rates.copy()

# only choose messages which have at least one successful try;
# we estimate that other messages did not fail becuase of bad parameters but because the resulting faulted signature is rejected

df_dsuccess_rates = df_dsuccess_rates[df_dsuccess_rates['zeros'] > 0]

df_dsuccess_rates = df_dsuccess_rates[gc.groups].copy()  # d is short for double grouping
df_dsuccess_rates = df_dsuccess_rates.reset_index().groupby([parameter for parameter in gc.parameters if parameter not in ['message', 'redo']]).agg('sum')
del df_dsuccess_rates['message']
df_dsuccess_rates

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,zeros,other,to count,to time,exc sign,exc get_sig
ext_offset,offset,width,repeat,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
51,0.390625,1.5625,1,2,0,20,0,0,0
54,0.390625,1.5625,1,4,0,29,0,0,0
55,0.390625,1.5625,1,2,0,9,0,0,0
113,0.390625,1.5625,1,1,0,10,0,0,0
116,0.390625,1.5625,1,8,0,36,0,0,0
...,...,...,...,...,...,...,...,...,...
1728,0.390625,1.5625,1,2,0,9,0,0,0
1729,0.390625,1.5625,1,6,0,27,0,0,0
1789,0.390625,1.5625,1,1,0,10,0,0,0
1790,0.390625,1.5625,1,4,0,29,0,0,0


In [12]:
df_zeros = df[df['zeros']]
for group in [g for g in gc.groups if g != 'constant']:
    del df_zeros[group]
df_zeros

Unnamed: 0_level_0,ext_offset,offset,width,repeat,message,redo,reset_duration_ns,packed,message_int,message_bytes,trig_count,timestamp,le_beta,trig_count_firstloop,trig_count_ratio
global_counter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
197,51,0.390625,1.5625,1,6,10,190313078.0,b'\xb0\x88\xa0p\x96\xa5?\xd8\xa1\xc9\t\xd4\xb1...,6,b'\x00\x00\x00\x06',11998,1.678798e+09,252.0,70,0.017606
204,51,0.390625,1.5625,1,7,6,190151466.0,"b'\xe3""a\xf93\xb3-\xa8\xe7""zI>\x19\xe5\xfb\xa4...",7,b'\x00\x00\x00\x07',11997,1.678798e+09,251.0,69,0.017354
511,54,0.390625,1.5625,1,2,5,188929849.0,b'\xf0\xfc\xa6\x05\x90}c\xd2\xb6\xf5v\x03\x86\...,2,b'\x00\x00\x00\x02',11997,1.678798e+09,251.0,69,0.017354
540,54,0.390625,1.5625,1,5,1,190388016.0,"b'\x82\xd8\x80\xfb\x99\xbamW\x9d6\xdf\x8a@,\x9...",5,b'\x00\x00\x00\x05',11997,1.678798e+09,252.0,69,0.017354
546,54,0.390625,1.5625,1,5,7,189875760.0,"b'\x82\xd8\x80\xfb\x99\xbamW\x9d6\xdf\x8a@,\x9...",5,b'\x00\x00\x00\x05',11997,1.678798e+09,252.0,69,0.017354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210750,1791,0.390625,1.5625,1,8,1,189450404.0,b'\xa3\xa2\xe2P\x86\xb1vxk\xb5\x1f\xcdRw~\r\xf...,8,b'\x00\x00\x00\x08',13733,1.678912e+09,141.0,1805,0.453974
210752,1791,0.390625,1.5625,1,8,3,189317907.0,b'\xa3\xa2\xe2P\x86\xb1vxk\xb5\x1f\xcdRw~\r\xf...,8,b'\x00\x00\x00\x08',13733,1.678912e+09,141.0,1805,0.453974
210754,1791,0.390625,1.5625,1,8,5,196330105.0,b'\xa3\xa2\xe2P\x86\xb1vxk\xb5\x1f\xcdRw~\r\xf...,8,b'\x00\x00\x00\x08',13733,1.678912e+09,141.0,1805,0.453974
210757,1791,0.390625,1.5625,1,8,8,189283788.0,b'\xa3\xa2\xe2P\x86\xb1vxk\xb5\x1f\xcdRw~\r\xf...,8,b'\x00\x00\x00\x08',13733,1.678912e+09,141.0,1805,0.453974


In [13]:
df_zeros_grouped = df_zeros.copy()

df_zeros_grouped.reset_index()

df_zeros_grouped = df_zeros.groupby(list(filter(lambda p: p != 'redo', gc.parameters))).agg({
    **{column: lambda series: tuple(sorted(set(series.dropna().astype(np.uint)))) for column in ['le_beta', 'trig_count_firstloop', 'trig_count_ratio']},
    **{'trig_count_ratio': lambda series: tuple(sorted(set(series.dropna())))}
})

rows_before_merge = df_zeros_grouped.shape[0]
df_zeros_grouped = df_zeros_grouped.merge(df_success_rates, left_index=True, right_index=True)
assert rows_before_merge == df_zeros_grouped.shape[0], "loosing data due to an inner merge"

df_zeros_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,le_beta,trig_count_firstloop,trig_count_ratio,zeros,other,to count,to time,exc sign,exc get_sig,total good,total bad,success rate
ext_offset,offset,width,repeat,message,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
51,0.390625,1.5625,1,6,"(252,)","(70,)","(0.017605633802816902,)",1,0,10,0,0,0,1,11,0.083333
51,0.390625,1.5625,1,7,"(251,)","(69,)","(0.017354124748490945,)",1,0,10,0,0,0,1,11,0.083333
54,0.390625,1.5625,1,2,"(251,)","(69,)","(0.017354124748490945,)",1,0,10,0,0,0,1,11,0.083333
54,0.390625,1.5625,1,5,"(252,)","(69,)","(0.017354124748490945,)",2,0,9,0,0,0,2,11,0.153846
54,0.390625,1.5625,1,6,"(251,)","(69,)","(0.017354124748490945,)",1,0,10,0,0,0,1,11,0.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1790,0.390625,1.5625,1,2,"(139,)","(1805,)","(0.4539738430583501,)",2,0,9,0,0,0,2,11,0.153846
1790,0.390625,1.5625,1,5,"(141,)","(1805,)","(0.4539738430583501,)",1,0,10,0,0,0,1,11,0.083333
1791,0.390625,1.5625,1,0,"(142,)","(1805,)","(0.4539738430583501,)",3,0,8,0,0,0,3,11,0.214286
1791,0.390625,1.5625,1,7,"(141,)","(1805,)","(0.4539738430583501,)",4,0,7,0,0,0,4,11,0.266667


In [14]:
df_zeros_dgrouped = df_zeros_grouped.copy()

df_zeros_dgrouped = df_zeros_dgrouped.reset_index()

# delete all success rate related stuff; we will merge this later from a previous df ...
del df_zeros_dgrouped['success rate']
del df_zeros_dgrouped['total bad']
del df_zeros_dgrouped['total good']
for group in gc.groups:
    del df_zeros_dgrouped[group]

groupby_columns = list(filter(lambda p: p not in ['message', 'redo'], gc.parameters))
set_columns = [c for c in df_zeros_dgrouped.columns if c not in groupby_columns + ['message']]

df_zeros_dgrouped = df_zeros_dgrouped.groupby(groupby_columns).agg({
    **{column: lambda series: tuple(sorted(set().union(*series.apply(set)))) for column in set_columns}
})

# df_zeros_dgrouped['poly_index'] = df_zeros_dgrouped['le_beta'].apply(lambda x: tuple(sorted(set(map(lambda y: (d.n - y) // 4, x)))))

rows_before_merge = df_zeros_dgrouped.shape[0]
df_zeros_dgrouped = df_zeros_dgrouped.merge(df_dsuccess_rates, left_index=True, right_index=True)
assert rows_before_merge == df_zeros_dgrouped.shape[0]  # make sure we lost no data while performing an inner merge

df_zeros_dgrouped = df_zeros_dgrouped.sort_index()  # important that ext_offset is properly sorted for clustering
to_cluster_data = np.sort(df_zeros_dgrouped.reset_index()[['ext_offset']].values)
dbscan = DBSCAN(eps=10, min_samples=1)
dbscan.fit(to_cluster_data)
df_zeros_dgrouped['poly_index'] = dbscan.labels_
display(f'We scanned {len(set(dbscan.labels_))} iterations so far ...')

df_zeros_dgrouped['total good'] = df_zeros_dgrouped[[group for group in gc.groups if constant['group_to_rating'][group] == 'good']].sum(axis=1)
df_zeros_dgrouped['total bad'] = df_zeros_dgrouped[[group for group in gc.groups if lambda group: constant['group_to_rating'][group] == 'bad']].sum(axis=1)

df_zeros_dgrouped['success rate'] = df_zeros_dgrouped['total good'] / (df_zeros_dgrouped['total good'] + df_zeros_dgrouped['total bad']).astype(np.float64)


df_zeros_dgrouped['straight_lines'] = [{'l': tuple()} for _ in range(df_zeros_dgrouped.shape[0])]
points = list(map(lambda x: tuple(x) + (-1,), df_zeros_dgrouped.reset_index()[['ext_offset', 'poly_index']].values))
straight_lines = linear.get(points)
for i, straight_line in enumerate(straight_lines):
    straight_line['l'] = bytes(['A'.encode()[0] + i]).decode()

offset = set(df_zeros_dgrouped.reset_index()['offset']).pop()
width = set(df_zeros_dgrouped.reset_index()['width']).pop()
repeat = set(df_zeros_dgrouped.reset_index()['repeat']).pop()
for straight_line in straight_lines:
    straight_line['total_good'] = 0
    straight_line['total_bad'] = 0
    ext_offsets = [straight_line['y_intercept'] + poly_index * straight_line['slope'] for poly_index in range(d._polyz_unpack_num_iters + 1)]
    for ext_offset in ext_offsets:
        try:
            df_zeros_dgrouped.loc[(ext_offset, offset, width, repeat),'straight_lines']['l'] += (straight_line,)
            straight_line['total_good'] += df_zeros_dgrouped.loc[(ext_offset, offset, width, repeat),'total good']
            straight_line['total_bad'] += df_zeros_dgrouped.loc[(ext_offset, offset, width, repeat),'total bad']
        except KeyError:
            pass  # well, that glitch was not detected ...
    straight_line['success_rate'] = straight_line['total_good'] / (straight_line['total_good'] + straight_line['total_bad'])
straight_lines = list(reversed(sorted(straight_lines, key=lambda s: (s['num_points'], s['success_rate']))))

display(straight_lines)
with pd.option_context('display.max_rows', None, 'display.max_colwidth', None):
    display(df_zeros_dgrouped)

'We scanned 29 iterations so far ...'

[{'slope': 62,
  'y_intercept': 55,
  'num_points': 28,
  'l': 'B',
  'total_good': 186,
  'total_bad': 781,
  'success_rate': 0.1923474663908997},
 {'slope': 62,
  'y_intercept': 54,
  'num_points': 28,
  'l': 'A',
  'total_good': 105,
  'total_bad': 682,
  'success_rate': 0.13341804320203304},
 {'slope': 62,
  'y_intercept': 51,
  'num_points': 17,
  'l': 'C',
  'total_good': 45,
  'total_bad': 297,
  'success_rate': 0.13157894736842105},
 {'slope': 62,
  'y_intercept': 53,
  'num_points': 17,
  'l': 'D',
  'total_good': 31,
  'total_bad': 253,
  'success_rate': 0.10915492957746478},
 {'slope': 61,
  'y_intercept': 72,
  'num_points': 4,
  'l': 'L',
  'total_good': 29,
  'total_bad': 88,
  'success_rate': 0.24786324786324787},
 {'slope': 63,
  'y_intercept': 38,
  'num_points': 4,
  'l': 'P',
  'total_good': 28,
  'total_bad': 99,
  'success_rate': 0.2204724409448819},
 {'slope': 61,
  'y_intercept': 56,
  'num_points': 4,
  'l': 'E',
  'total_good': 24,
  'total_bad': 99,
  'success

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,le_beta,trig_count_firstloop,trig_count_ratio,zeros,other,to count,to time,exc sign,exc get_sig,poly_index,total good,total bad,success rate,straight_lines
ext_offset,offset,width,repeat,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
51,0.390625,1.5625,1,"(251, 252)","(69, 70)","(0.017354124748490945, 0.017605633802816902)",2,0,20,0,0,0,0,2,22,0.083333,"{'l': ({'slope': 62, 'y_intercept': 51, 'num_points': 17, 'l': 'C', 'total_good': 45, 'total_bad': 297, 'success_rate': 0.13157894736842105},)}"
54,0.390625,1.5625,1,"(251, 252)","(69,)","(0.017354124748490945,)",4,0,29,0,0,0,0,4,33,0.108108,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304},)}"
55,0.390625,1.5625,1,"(253,)","(69,)","(0.017354124748490945,)",2,0,9,0,0,0,0,2,11,0.153846,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997},)}"
113,0.390625,1.5625,1,"(248,)","(131,)","(0.0329476861167002,)",1,0,10,0,0,0,1,1,11,0.083333,"{'l': ({'slope': 62, 'y_intercept': 51, 'num_points': 17, 'l': 'C', 'total_good': 45, 'total_bad': 297, 'success_rate': 0.13157894736842105}, {'slope': 63, 'y_intercept': 50, 'num_points': 4, 'l': 'S', 'total_good': 12, 'total_bad': 55, 'success_rate': 0.1791044776119403})}"
116,0.390625,1.5625,1,"(247, 248, 249)","(131,)","(0.0329476861167002,)",8,0,36,0,0,0,1,8,44,0.153846,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304},)}"
117,0.390625,1.5625,1,"(248, 249, 250)","(131,)","(0.0329476861167002,)",18,0,37,0,0,0,1,18,55,0.246575,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 56, 'num_points': 4, 'l': 'E', 'total_good': 24, 'total_bad': 99, 'success_rate': 0.1951219512195122})}"
178,0.390625,1.5625,1,"(243, 244, 245)","(193,)","(0.048541247484909456,)",4,0,18,0,0,0,2,4,22,0.153846,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304}, {'slope': 61, 'y_intercept': 56, 'num_points': 4, 'l': 'E', 'total_good': 24, 'total_bad': 99, 'success_rate': 0.1951219512195122})}"
179,0.390625,1.5625,1,"(245,)","(193,)","(0.048541247484909456,)",6,0,16,0,0,0,2,6,22,0.214286,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 57, 'num_points': 4, 'l': 'F', 'total_good': 12, 'total_bad': 66, 'success_rate': 0.15384615384615385})}"
239,0.390625,1.5625,1,"(240,)","(255,)","(0.06413480885311872,)",1,0,10,0,0,0,3,1,11,0.083333,"{'l': ({'slope': 62, 'y_intercept': 53, 'num_points': 17, 'l': 'D', 'total_good': 31, 'total_bad': 253, 'success_rate': 0.10915492957746478}, {'slope': 61, 'y_intercept': 56, 'num_points': 4, 'l': 'E', 'total_good': 24, 'total_bad': 99, 'success_rate': 0.1951219512195122}, {'slope': 63, 'y_intercept': 50, 'num_points': 4, 'l': 'S', 'total_good': 12, 'total_bad': 55, 'success_rate': 0.1791044776119403})}"
240,0.390625,1.5625,1,"(239,)","(255,)","(0.06413480885311872,)",3,0,19,0,0,0,3,3,22,0.12,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304}, {'slope': 61, 'y_intercept': 57, 'num_points': 4, 'l': 'F', 'total_good': 12, 'total_bad': 66, 'success_rate': 0.15384615384615385})}"


In [15]:
df_poly_idx = df_zeros_dgrouped.copy()
df_poly_idx = df_poly_idx.reset_index()

df_poly_idx = df_poly_idx.groupby('poly_index').apply(lambda group: group.loc[group['success rate'].idxmax()])

df_poly_idx['straight_lines_label'] = df_poly_idx['straight_lines'].apply(lambda straight_lines: tuple(map(lambda straight_line: straight_line['l'], straight_lines['l'])))

with pd.option_context('display.max_rows', None, 'display.max_colwidth', None):
    display(df_poly_idx)
display(f'Following shows what straight_line occurs how often in this dataframe showing only the best parameters for every poly_index')
df_poly_idx['straight_lines_label'].explode().value_counts()

Unnamed: 0_level_0,ext_offset,offset,width,repeat,le_beta,trig_count_firstloop,trig_count_ratio,zeros,other,to count,to time,exc sign,exc get_sig,poly_index,total good,total bad,success rate,straight_lines,straight_lines_label
poly_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,55,0.390625,1.5625,1,"(253,)","(69,)","(0.017354124748490945,)",2,0,9,0,0,0,0,2,11,0.153846,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997},)}","(B,)"
1,117,0.390625,1.5625,1,"(248, 249, 250)","(131,)","(0.0329476861167002,)",18,0,37,0,0,0,1,18,55,0.246575,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 56, 'num_points': 4, 'l': 'E', 'total_good': 24, 'total_bad': 99, 'success_rate': 0.1951219512195122})}","(B, E)"
2,179,0.390625,1.5625,1,"(245,)","(193,)","(0.048541247484909456,)",6,0,16,0,0,0,2,6,22,0.214286,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 57, 'num_points': 4, 'l': 'F', 'total_good': 12, 'total_bad': 66, 'success_rate': 0.15384615384615385})}","(B, F)"
3,241,0.390625,1.5625,1,"(241, 242)","(255,)","(0.06413480885311872,)",13,0,20,0,0,0,3,13,33,0.282609,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 58, 'num_points': 4, 'l': 'G', 'total_good': 21, 'total_bad': 88, 'success_rate': 0.1926605504587156})}","(B, G)"
4,302,0.390625,1.5625,1,"(235,)","(317,)","(0.07972837022132796,)",3,0,8,0,0,0,4,3,11,0.214286,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304}, {'slope': 61, 'y_intercept': 58, 'num_points': 4, 'l': 'G', 'total_good': 21, 'total_bad': 88, 'success_rate': 0.1926605504587156}, {'slope': 63, 'y_intercept': 50, 'num_points': 4, 'l': 'S', 'total_good': 12, 'total_bad': 55, 'success_rate': 0.1791044776119403})}","(A, G, S)"
5,365,0.390625,1.5625,1,"(233,)","(379,)","(0.09532193158953722,)",7,0,15,0,0,0,5,7,22,0.241379,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 63, 'y_intercept': 50, 'num_points': 4, 'l': 'S', 'total_good': 12, 'total_bad': 55, 'success_rate': 0.1791044776119403})}","(B, S)"
6,427,0.390625,1.5625,1,"(228, 229)","(441,)","(0.11091549295774648,)",6,0,16,0,0,0,6,6,22,0.214286,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997},)}","(B,)"
7,488,0.390625,1.5625,1,"(223, 224)","(503,)","(0.12650905432595574,)",6,0,16,0,0,0,7,6,22,0.214286,"{'l': ({'slope': 62, 'y_intercept': 54, 'num_points': 28, 'l': 'A', 'total_good': 105, 'total_bad': 682, 'success_rate': 0.13341804320203304}, {'slope': 63, 'y_intercept': 47, 'num_points': 4, 'l': 'R', 'total_good': 14, 'total_bad': 77, 'success_rate': 0.15384615384615385})}","(A, R)"
8,551,0.390625,1.5625,1,"(222,)","(565,)","(0.142102615694165,)",3,0,8,0,0,0,8,3,11,0.214286,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 63, 'y_intercept': 47, 'num_points': 4, 'l': 'R', 'total_good': 14, 'total_bad': 77, 'success_rate': 0.15384615384615385})}","(B, R)"
9,613,0.390625,1.5625,1,"(217, 218)","(627,)","(0.15769617706237424,)",6,0,16,0,0,0,9,6,22,0.214286,"{'l': ({'slope': 62, 'y_intercept': 55, 'num_points': 28, 'l': 'B', 'total_good': 186, 'total_bad': 781, 'success_rate': 0.1923474663908997}, {'slope': 61, 'y_intercept': 64, 'num_points': 4, 'l': 'H', 'total_good': 16, 'total_bad': 88, 'success_rate': 0.15384615384615385})}","(B, H)"


'Following shows what straight_line occurs how often in this dataframe showing only the best parameters for every poly_index'

B    16
A     6
C     4
D     3
L     3
P     3
G     2
S     2
R     2
H     2
O     2
J     1
M     1
Q     1
I     1
E     1
F     1
N     1
Name: straight_lines_label, dtype: int64

In [16]:
# if we have the data we use df_poly_idx; else we rely on the best slope with the a) most matching points and b) best success rate
final_ext_offsets = [df_poly_idx.loc[poly_index, 'ext_offset'] if poly_index in df_poly_idx.index else straight_lines[0]['y_intercept'] + poly_index * straight_lines[0]['slope'] for poly_index in range(d._polyz_unpack_num_iters - 1)]
display(final_ext_offsets, len(final_ext_offsets))
display(str(final_ext_offsets))

[55,
 117,
 179,
 241,
 302,
 365,
 427,
 488,
 551,
 613,
 673,
 736,
 798,
 857,
 919,
 983,
 1045,
 1109,
 1170,
 1233,
 1295,
 1353,
 1419,
 1481,
 1543,
 1605,
 1663,
 1728,
 1791,
 1853,
 1915,
 1977,
 2039,
 2101,
 2163,
 2225,
 2287,
 2349,
 2411,
 2473,
 2535,
 2597,
 2659,
 2721,
 2783,
 2845,
 2907,
 2969,
 3031,
 3093,
 3155,
 3217,
 3279,
 3341,
 3403,
 3465,
 3527,
 3589,
 3651,
 3713,
 3775,
 3837,
 3899]

63

'[55, 117, 179, 241, 302, 365, 427, 488, 551, 613, 673, 736, 798, 857, 919, 983, 1045, 1109, 1170, 1233, 1295, 1353, 1419, 1481, 1543, 1605, 1663, 1728, 1791, 1853, 1915, 1977, 2039, 2101, 2163, 2225, 2287, 2349, 2411, 2473, 2535, 2597, 2659, 2721, 2783, 2845, 2907, 2969, 3031, 3093, 3155, 3217, 3279, 3341, 3403, 3465, 3527, 3589, 3651, 3713, 3775, 3837, 3899]'

In [17]:
delimiterhere

NameError: name 'delimiterhere' is not defined

In [None]:
result_list_full

In [None]:
def mode_warning_tie(series: pd.Series) -> np.uint:
    mode_series = series.dropna().astype(np.uint).mode()
    if len(mode_series) == 0:
        return np.nan
    winner = mode_series.sample(n=1, random_state=mode_series.sum()).iloc[0]  # if tie deterministic random winner
    if len(mode_series) != 1:
        __LOGGER.warning(f"For a certin ext_offset, offset and width we have a tie on to what poly_index it should be mapped: possible candidates: {list(mode_series)}. Arbitrary winner: {winner}.")
    return winner

do_merge_to_set = [column for column in df.columns if column not in gc.groups and column not in gc.parameters and column not in df.index.names + ['poly_index'] and column != 'packed_if_good' and column != 'poly_index_if_good']
df_grouped = df.copy()
df_grouped['poly_index_if_good_mv'] = df_grouped['poly_index_if_good']
df_grouped = df_grouped.groupby(list(gc.parameters)).agg({
    **{group: 'sum' for group in gc.groups},
    **{column: lambda x: tuple(sorted(set(x.dropna().astype(np.uint)))) for column in do_merge_to_set},
    **{'packed_if_good': lambda series: set(series.dropna())},
#    **{'poly_index_if_good': lambda series: series.dropna().astype(np.uint).value_counts().to_dict()},
    **{'poly_index_if_good_mv': mode_warning_tie}
})

# WTF?
for parameter in gc.parameters:
    try:
        del df_grouped[parameter]
    except KeyError:
        pass

# df_grouped

In [None]:
df_grouped_good = df_grouped[df_grouped[list(groups_good)].sum(axis=1) > 0]
df_grouped_good = df_grouped_good.astype({'poly_index_if_good_mv': np.uint})
df_grouped_good = df_grouped_good.reset_index().set_index((df_grouped_good.index.names[1:] + df_grouped_good.index.names[:1]))
df_grouped_good = df_grouped_good.sort_index()
df_grouped_good['exact'] = df_grouped_good['packed_if_good'].apply(lambda set_of_packed_sigs: len(set_of_packed_sigs) == 1)

df_grouped_good['success_rate'] = df_grouped_good[list(groups_good)].sum(axis=1) / (df_grouped_good[list(groups_good)].sum(axis=1) + df_grouped_good[list(groups_bad)].sum(axis=1))
try:
    del df_grouped_good['packed_if_good']
    del df_grouped_good['poly_index_if_good']
except KeyError:
    pass

display_all_rows(df_grouped_good)

In [None]:
df_n = df_grouped_good.copy()  # n for narrow
df_n['success_rates'] = df_n[list(groups_good)].sum(axis=1) / (df_n[list(groups_good)].sum(axis=1) + df_n[list(groups_bad)].sum(axis=1))
df_n = df_n.reset_index().groupby(list(gc.parameters[1:])).agg({
    **{group: 'sum' for group in gc.groups},
    **{'ext_offset': list, 'poly_index_if_good_mv': list, 'success_rates': list}
})

df_n = df_n.rename({
    'ext_offset': 'ext_offsets',
    'poly_index_if_good_mv': 'poly_indices',
})

df_n['success_rate'] = df_n[list(groups_good)].sum(axis=1) / (df_n[list(groups_good)].sum(axis=1) + df_n[list(groups_bad)].sum(axis=1))


df_n['points'] = [list(zip(ext_offsets, poly_indices, success_rates))
          for ext_offsets, poly_indices, success_rates in zip(df_n['ext_offset'], df_n['poly_index_if_good_mv'], df_n['success_rates'])]
df_n['straight_lines'] = df_n['points'].apply(lambda series: linear.get(series))

def add_success_rate(offset: float, width: float, straight_line: dict) -> dict:
    if 'success_rate' in straight_line:
        __LOGGER.warning(f'Straight line already has a success rate. I will overwrite it!')
    
    relevant_df = df_grouped

    indices = [(straight_line['slope'] * x + straight_line['y_intercept'], offset, width) for x in range(d._polyz_unpack_num_iters + 10)]  # +10 just be sure; can't hurt
    # print(indices)
    relevant_rows = relevant_df[relevant_df.index.isin(indices)]
    # display(relevant_rows)
    
    total_good = relevant_rows[list(groups_good)].sum(axis=1).sum()
    total_bad = relevant_rows[list(groups_bad)].sum(axis=1).sum()
    success_rate = total_good / (total_good + total_bad)
    
    straight_line['success_rate'] = success_rate
    return straight_line

for (offset, width), straight_lines in df_n['straight_lines'].items():
    for straight_line in straight_lines:
        add_success_rate(offset, width, straight_line)
    

del df_n['ext_offset'], df_n['poly_index_if_good_mv'], df_n['success_rates'],  df_n['points']

with pd.option_context('display.max_colwidth', None):
    display(df_n)

In [None]:
df_plot = df_n.copy()

df_plot['success_rate'] = df_plot['success_rate'] * 100
df_plot = df_plot.rename(columns={'success_rate': 'success_rate (%)'}, errors="raise")
df_plot['straight_lines'] = df_plot['straight_lines'].apply(str)

fig = px.scatter(df_plot.reset_index(), x="offset", y="width", color="success_rate (%)", color_continuous_scale="RdYlGn",
                 hover_data={"straight_lines": True, "success_rate (%)": ":.2f"}, template='plotly_white')
fig.update_traces(marker_size=15)
fig

In [None]:
lines = df_n[['straight_lines']].copy()
lines = lines[lines['straight_lines'].apply(len) > 0]
lines['slope'] = lines['straight_lines'].apply(lambda straight_lines: list(map(lambda straight_line: straight_line['slope'], straight_lines)))
lines['y_intercept'] = lines['straight_lines'].apply(lambda straight_lines: list(map(lambda straight_line: straight_line['y_intercept'], straight_lines)))
lines['success_rate'] = lines['straight_lines'].apply(lambda straight_lines: list(map(lambda straight_line: straight_line['success_rate'], straight_lines)))

lines = lines.explode(['slope', 'y_intercept', 'success_rate']).reset_index().set_index(lines.index.names + ['slope', 'y_intercept'])

del lines['straight_lines']

lines = lines.sort_values(by=['success_rate'], ascending=False)
lines['ext_offset'] = lines.apply(lambda row: [row.name[lines.index.names.index('slope')] * x + row.name[lines.index.names.index('y_intercept')] for x in range(d._polyz_unpack_num_iters + 10)], axis=1)  # +10 just be sure; can't hurt)

lines

In [None]:
for index, row in lines.iterrows():
    offset, width, slope, y_intercept = index
    print(index)
    indices = [(ext_offset,) + index[:2] for ext_offset in row['ext_offset']]
    df_line = df_grouped.loc[[index for index in indices if index in df_grouped.index]].copy()
    df_line['total_good'] = df_line[list(groups_good)].sum(axis=1)
    df_line['total_bad'] = df_line[list(groups_bad)].sum(axis=1)
    df_line['success_rate (%)'] = df_line['total_good'] / (df_line['total_good'] + df_line['total_bad']) * 100
    # display(df_line)
    # hover_data={"straight_lines": True, "success_rate (%)": ":.2f"}, 
    fig = px.scatter(df_line.reset_index(), x="poly_index_if_good_mv", y="ext_offset", color="success_rate (%)", color_continuous_scale="RdYlGn",
                  hover_data={"total_good": True, "total_bad": True}, template='plotly_white')
    fig.update_traces(marker_size=15)
    fig.update_layout(title=f'offset={offset};width={width};straight_line="{slope} * poly_index + {y_intercept}"; avg. success_rate: {row["success_rate"]}')
    display(fig)