In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os

In [None]:
from src.utils.experimentManager import ExperimentManagerLoadFunction


In [None]:
# Load the results from the moel trainings
# for each folder in the results folder 
# find all the folders in the results folder
results_folder = './results_journal/'
config_file_name = 'config.csv'
train_hist_file_name = 'train_history.csv'
train_sub_epoch_hist_file_name = 'train_sub_epoch_history.csv'
test_file_name = 'test_results.csv'
folders = [f for f in os.listdir(results_folder) if os.path.isdir(os.path.join(results_folder, f))]

hover_data=['hidden_sizes','output_sizes','zeroOrderHoldRegularization','norm','augments','act']
# hover_data=['hidden_sizes','output_sizes','zeroOrderHoldRegularization','norm']

# add specific columns of the config to the training history
keys = ['input_bias', 'complex_output', 'norm', 'bias_init', 'stability']
# merge keys to one string
merged_key = '/'.join(keys)

In [None]:
def load_train(folder,read_sub_epoch_hist=False):
    train_hist_all = pd.DataFrame()
    for i in range(0,len(folders)):
        config_path = os.path.join(results_folder, folders[i], config_file_name)
        train_hist_path = os.path.join(results_folder, folders[i], train_hist_file_name)
        if folders[i] == 'QUEUE':
            continue
        # load the config file
        config = pd.read_csv(config_path,index_col=0).T
        # if bias_init is not in the config file, add it
        if 'bias_init' not in config.columns:
            config['bias_init'] = 'zero'
        # load the training history
        if 'stability' not in config.columns:
            config['stability'] = 'relu'
        try:
            train_hist = pd.read_csv(train_hist_path, index_col=0)
        except:
            print(f'Could not read {train_hist_path}')
            continue
        # add config to the training history

        # add the config to the df
        for key in config:
            train_hist[key] = config[key].values[0]

        

        merged_value = '/'.join([str(config[key].values[0]) for key in keys])

        train_hist[merged_key] = folders[i].split('_')[0] + '/' + merged_value
        train_hist['Run'] = int(folders[i].split('_')[0].split('#')[1])

        train_hist_all = pd.concat([train_hist_all, train_hist], ignore_index=True)
        
        # # read sub epoch history
        if read_sub_epoch_hist:
            if os.path.exists(os.path.join(results_folder, folders[i], train_sub_epoch_hist_file_name)):
                sub_history = pd.read_csv(os.path.join(results_folder, folders[i], train_sub_epoch_hist_file_name))
                sub_history['Run'] = int(folders[i].split('_')[0].split('#')[1])
                train_hist_all = pd.concat([train_hist_all, sub_history], ignore_index=True)
                if 'lr' in sub_history.columns:
                    sub_history['lr'] = sub_history['lr'].astype(float)
                print("Read sub history")
    return train_hist_all

In [None]:
def load_test(folders):
    test_all = pd.DataFrame()
    for i in range(0,len(folders)):
        config_path = os.path.join(results_folder, folders[i], config_file_name)
        train_hist_path = os.path.join(results_folder, folders[i], test_file_name)
        if folders[i] == 'QUEUE':
            continue
        # load the config file
        config = pd.read_csv(config_path,index_col=0).T
        # if bias_init is not in the config file, add it
        if 'bias_init' not in config.columns:
            config['bias_init'] = 'zero'
        # load the training history
        if 'stability' not in config.columns:
            config['stability'] = 'relu'
        try:
            test = pd.read_csv(train_hist_path, index_col=0)
            
        except:
            print(f'Could not read {train_hist_path}')
            continue

        # add the config to the df
        for key in config:
            test[key] = config[key].values[0]

        test['Run'] = int(folders[i].split('_')[0].split('#')[1])
        # add config to the training history
        merged_value = '/'.join([str(config[key].values[0]) for key in keys])

        test[merged_key] = merged_value

        test_all = pd.concat([test_all, test])
    return test_all

In [None]:
train_hist_all = load_train(folders,read_sub_epoch_hist=False)
test_all = load_test(folders)

In [None]:
train_hist_all['epoch'] = train_hist_all['epoch'].astype(float)
train_hist_all.sort_values(by=['epoch','Run'], inplace=True)
train_hist_all.reset_index(drop=True, inplace=True)
train_hist_all

In [None]:
test_all.sort_values(by=['Run'], inplace=True)
test_all.reset_index(drop=True, inplace=True)

# # test_all.sort_values(by=['test_acc'], ascending=False ,inplace=True)
# # test_all.sort_values(by=['test_acc_8k'], ascending=False ,inplace=True)
# # test_all.sort_values(by=['test_acc_4k'], ascending=False ,inplace=True)
test_all.sort_values(by=['params','test_acc','output_sizes'], ascending=[False,False,False] ,inplace=True)
# test_all.sort_values(by=['best_val_acc'], ascending=[False] ,inplace=True)

# test_all = test_all.query('zeroOrderHoldRegularization=="[]" and norm=="True" and B_C_init=="S5"')
# test_all = test_all.query('norm=="False"')
# test_all = test_all.query('dropout=="0.0"')
# slice = test_all.query(' hidden_sizes=="[32, 16, 8]" ')
for params in test_all['params'].unique():
    slice = test_all.query('params==@params')
    # print(f"{slice[['Run','params','zeroOrderHoldRegularization','test_acc_16k','best_val_acc_epoch','test_acc','best_val_acc','test_acc_8k','test_acc_4k','output_sizes','hidden_sizes','augments','lr','weight_decay','norm', 'dropout','norm', 'B_C_init']].to_string()}")
    print(f"{slice[['Run','params','zeroOrderHoldRegularization','best_val_acc_epoch','test_acc','best_val_acc','output_sizes','hidden_sizes','augments','lr','weight_decay','norm', 'dropout','norm', 'B_C_init']].to_string()}")
    print("")
# slice = test_all.query('params==61285')
# # slice = test_all
# print(f"{slice[['Run','params','zeroOrderHoldRegularization','test_acc_16k','test_acc','best_val_acc','test_acc_8k','test_acc_4k','output_sizes','hidden_sizes','augments','lr','weight_decay','norm', 'dropout', 'B_C_init']].to_string()}")
# # print(test_all.to_string())

In [None]:
# # plot the training history
display_list = ['test_acc_16k', 'test_acc_8k', 'test_acc_4k', 'test_acc', 'best_val_acc']
for display in display_list:
    # fig =  px.line(test_all, x='params', y=display, color='Run',hover_name=merged_key, hover_data=hover_data, markers='.')
    test_all['group'] = test_all['output_sizes'].apply(lambda x: str(x).split('[')[-1].split(',')[0])
    fig =  px.box(test_all, x='params', y=display,hover_name=merged_key, hover_data=hover_data,points="all", color='group')
    fig.update_layout(
        xaxis_title='Number of Parameters',
        yaxis_title=display,
        title=f'Test Accuracy {display}'
    )
    fig.update_traces(width=2000)

    # fig.update_traces(showlegend=False)

    fig2 = go.Figure()


    group = test_all.groupby(['params','dropout'])
    group = group[display].mean().reset_index()
    group['params'] = group['params'].astype(float)
    group['dropout'] = group['dropout'].astype(float)

    slice1 = group[group['dropout']==0]
    fig.add_trace(go.Scatter(x=slice1['params'], y=slice1[display], mode='lines', name='Mean dropout 0.0',line=dict(dash='dot')))
    slice2 = group[group['dropout']==0.1]
    fig.add_trace(go.Scatter(x=slice2['params'], y=slice2[display], mode='lines', name='Mean dropout 0.1',line=dict(dash='dot')))

    slice1 = slice1.copy()
    slice2 = slice2.copy()

    slice1.sort_values(by='params', inplace=True,ignore_index=True)
    slice2.sort_values(by='params', inplace=True,ignore_index=True)

    fig2.add_trace(go.Bar(x=slice1['params'], y=slice2[display]-slice1[display], name='Enabling Dropout'))

    group = test_all.groupby(['params','zeroOrderHoldRegularization'])
    group = group[display].mean().reset_index()
    group['params'] = group['params'].astype(float)

    slice1 = group[group['zeroOrderHoldRegularization']=='[]']
    fig.add_trace(go.Scatter(x=slice1['params'], y=slice1[display], mode='lines', name='Mean zeroOrderHoldRegularization []',line=dict(dash='dash')))
    slice2 = group[group['zeroOrderHoldRegularization']!='[]']
    fig.add_trace(go.Scatter(x=slice2['params'], y=slice2[display], mode='lines', name='Mean zeroOrderHoldRegularization not []',line=dict(dash='dash')))

    slice1 = slice1.copy()
    slice2 = slice2.copy()

    slice1.sort_values(by='params', inplace=True,ignore_index=True)
    slice2.sort_values(by='params', inplace=True,ignore_index=True)

    fig2.add_trace(go.Bar(x=slice1['params'], y=slice2[display]-slice1[display], name='Enabling ZeroOrderHoldRegularization'))

    group = test_all.groupby(['params','norm'])
    group = group[display].mean().reset_index()
    group['params'] = group['params'].astype(float)

    slice1 = group[group['norm']=="False"]
    fig.add_trace(go.Scatter(x=slice1['params'], y=slice1[display], mode='lines', name='Mean norm False',line=dict(dash='dashdot')))
    slice2 = group[group['norm']=="True"]
    fig.add_trace(go.Scatter(x=slice2['params'], y=slice2[display], mode='lines', name='Mean norm True',line=dict(dash='dashdot')))

    slice1 = slice1.copy()
    slice2 = slice2.copy()

    slice1.sort_values(by='params', inplace=True,ignore_index=True)
    slice2.sort_values(by='params', inplace=True,ignore_index=True)

    fig2.add_trace(go.Bar(x=slice1['params'], y=slice2[display]-slice1[display], name='Enabling Norm'))
    


    fig.show()
    fig.write_html(f"{display}_over_runs.html")


    fig2.update_layout(
        xaxis_title='Number of Parameters',
        yaxis_title=display,
        title=f'Test Accuracy mean difference {display}'
    )
    fig2.update_traces(showlegend=True)
    fig2.show()
    fig2.write_html(f"{display}_over_runs_mean_diff.html")


In [None]:
# plot the training history
fig = px.line(train_hist_all, x='epoch', y='train_acc', color='Run',hover_name=merged_key, hover_data=hover_data, markers='.', title='Train Accuracy')
# fig.update_yaxes(type="log")
fig.show()

In [None]:
# plot the training history
fig = px.line(train_hist_all, x='epoch', y='train_loss', color='Run',hover_name=merged_key, hover_data=hover_data,markers='.', title='Train Loss')
fig.update_yaxes(type="log") 
fig.show()

In [None]:
# plot the training history
fig = px.line(train_hist_all, x='epoch', y='valid_acc', color='Run',hover_name=merged_key, hover_data=hover_data,markers='.', title='Valid Accuracy')
fig.update_yaxes(type="log")
fig.show()

In [None]:
# plot the training history
fig = px.line(train_hist_all, x='epoch', y='valid_loss', color='Run',hover_name=merged_key,  hover_data=hover_data, markers='.', title='Valid Loss', log_y=True)
fig.update_yaxes(type="log")
fig.show()

In [None]:
# plot the training history
fig = px.line(train_hist_all, x='epoch', y='lr', color='Run',hover_name=merged_key,markers='.', title='Learning rate')
fig.update_yaxes(type="log")
fig.show()

In [None]:
#  training history
fig = px.line(train_hist_all, x='epoch', y='learning_rate', color='Run',hover_name=merged_key,markers='.', title='Learning rate')
fig.update_yaxes(type="log")
fig.show()

In [None]:

paths = []
# 8 k Param
# paths.append(ExperimentManagerLoadFunction(results_folder,run=106))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=107)) # reg

# paths.append(ExperimentManagerLoadFunction(results_folder,run=88))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=89)) # reg


# 20k param
# paths.append(ExperimentManagerLoadFunction(results_folder,run=194))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=195)) # reg

# # others
# paths.append(ExperimentManagerLoadFunction(results_folder,run=410)) # 20k S5
# paths.append(ExperimentManagerLoadFunction(results_folder,run=388)) # 56k S5
# paths.append(ExperimentManagerLoadFunction(results_folder,run=389)) # 56k S5 reg



###### TOP Accuracy Runs !!! ###############
# 8 k Param BN=True
# paths.append(ExperimentManagerLoadFunction(results_folder,run=304))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=305)) # reg

# # 20k param BN=True
# paths.append(ExperimentManagerLoadFunction(results_folder,run=302))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=303)) # reg


# # 56k param BN = True
# paths.append(ExperimentManagerLoadFunction(results_folder,run=298))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=299)) # reg

# 141k param
# paths.append(ExperimentManagerLoadFunction(results_folder,run=144))
# paths.append(ExperimentManagerLoadFunction(results_folder,run=145)) # reg

# paths.append(ExperimentManagerLoadFunction(results_folder,run=11)) 

paths.append(ExperimentManagerLoadFunction(results_folder,run=0))


# 
device_frequencies = {'L4R5ZIT6U':120e6,'F303K8':72e6,}
cycles_per_mac = 6 # das könnte möglicherweise erreichbar sein mit allen weights im ram
sampling_rate = 16e3

fig2 = go.Figure()

x_axis = 'macs percent'
# x_axis = 'macs'

for path in paths:
    print(path)
    run = int(path.split('/')[-1].split('_')[0].split('#')[1])
    df = []
    for files in os.listdir(path):
        if 'test_results_flops_step_scale' in files:
            df.append(pd.read_csv(os.path.join(path,files),sep=','))
    df = pd.concat(df)

    # read and add config file
    config = pd.read_csv(os.path.join(path, config_file_name), index_col=0).T
    for key in config:
        df[key] = config[key].values[0]

    #plot with plotly
    # fig = go.Figure()
    fig = px.scatter(df, y='test_acc', x=x_axis, color='step_scale', hover_data=['macs',x_axis,'params',*hover_data], title=path)
    fig.update_traces(showlegend=False)

    pareto_front = df.sort_values(by='test_acc', ascending=False).drop_duplicates(subset=['step_scale'])
    pareto_front = pareto_front.cummin(0)
    # filter the pareto points
    pareto_front2 = df.sort_values(by='test_acc', ascending=False).drop_duplicates(subset=['step_scale'])
    pareto_front2 = pareto_front2[pareto_front2[x_axis]==pareto_front[x_axis]]
    fig.add_trace(go.Scatter(x=pareto_front2[x_axis], y=pareto_front2['test_acc'], mode='lines', line=dict(color='gray', dash='dash'), name='Pareto Front'))
    tmp = px.scatter(pareto_front2, y='test_acc', x=x_axis, hover_data=['step_scale','macs','macs percent','params',*hover_data], title=path)
    # tmp.data[0]['name'] = 'Pareto Points'
    # tmp.update_traces(name='Pareto Points')
    fig.add_trace(tmp.data[0])
    fig.data[-1]['name'] = 'Pareto Points'
    fig.data[-1]['showlegend'] = True



    fig2.add_trace(go.Scatter(x=pareto_front2[x_axis], y=pareto_front2['test_acc'], mode='lines', name=f'Run {run}'))

    # the [n,X,X] pareto fronts
    for n in range(1,10+1):
        slice = df[df['step_scale'].str.contains(f'[{n},',regex=False)]
        if slice.empty:
            continue
        pareto_front = slice.sort_values(by='test_acc', ascending=False).drop_duplicates(subset=['step_scale'])
        pareto_front = pareto_front.cummin(0)
        pareto_front2 = slice.sort_values(by='test_acc', ascending=False).drop_duplicates(subset=['step_scale'])
        pareto_front2 = pareto_front2[pareto_front2[x_axis]==pareto_front[x_axis]]
        # pareto_front2 = pareto_front
        fig.add_trace(go.Scatter(x=pareto_front2[x_axis], y=pareto_front2['test_acc'], mode='lines', line=dict(dash='dot'), name=f'Pareto Front [{n},X,...,X]'))


    # add the second sweep
    # plot a horizontal line for the best accuracy scatter dasehd black line
    # fig.add_hline(y=0.9251249, line=dict(color='gray', dash='dash'))
    # for name,device_frequency in device_frequencies.items():
    #     fig.add_vline(x=(device_frequency/sampling_rate)/cycles_per_mac, line=dict(color='gray', dash='dash'))
    #     fig.add_annotation(x=(device_frequency/sampling_rate)/cycles_per_mac, y=0.9251249, text=f'{name}', showarrow=True, arrowhead=1)

    #fig.add_trace(go.Scatter(x=[30, 110], y=[0.8752, 0.8752], mode='lines', line=dict(color='gray', dash='dash')))
    #fig.add_trace(go.Scatter(x=[30, 110], y=[0.736, 0.736], mode='lines', line=dict(color='gray', dash='dash')))
    # change x-axis name 
    # fig.update_xaxes(title='MACs')
    # set y-axis to log scale
    fig.update_yaxes(title='Test Accuracy')
    # fig.update_yaxes(type='log')
    fig.update_layout(title=f'Test Accuracy vs MACs {path}')
    # fig.update_yaxes(range=[0.92,0.95])
    if x_axis == 'macs percent':
        fig.update_xaxes(title='MACs [%]')
        fig.update_xaxes(range=[0,100])

    # fig.update_xaxes(type='log')
    # fig.update_xaxes(range=[0,2])
    fig.update_xaxes(title='MACs [%]')

    fig.show()
    fig.write_html(f"{path.split('/')[-1]}.html")
    # del fig
# fig2.add_hline(y=0.9251249, line=dict(color='gray', dash='dash'))
# for name,device_frequency in device_frequencies.items():
#     fig2.add_vline(x=(device_frequency/sampling_rate)/cycles_per_mac, line=dict(color='gray', dash='dash'))
#     fig2.add_annotation(x=(device_frequency/sampling_rate)/cycles_per_mac, y=0.9251249, text=f'{name}', showarrow=True, arrowhead=1)

fig2.update_xaxes(title='MACs [%]')
fig2.update_yaxes(title='Test Accuracy')
fig2.update_layout(title=f'Pareto fronts for Test Accuracy vs MACs')
# fig2.update_yaxes(type='log')
# fig2.update_xaxes(type='log')
# fig2.update_xaxes(type='log', range=[0,2])
if x_axis == 'macs percent':
    fig2.update_xaxes(title='MACs [%]')
    # fig.update_xaxes(range=[0,2])
    fig2.update_xaxes(range=[0,100])
fig2.show()

# fig2.write_html(f"pareto_fronts.html")