In [None]:
from os.path import join, dirname, basename, splitext, abspath
from glob import glob
import pandas as pd
import yaml
import platform
from IPython.display import display
from IPython.core.display import display, HTML
import platform
from util.loader import load_raw_data, convert_data
display(HTML("<style>.container { width:100% !important; }</style>"))

this_folder = dirname(__file__) if '__file__' in globals() else abspath('')
root_folder = dirname(dirname(this_folder))
def get_test_result_folder(testname = 'baseline'):
    return join(root_folder, 'test', 'testresults', 'XPS-15-9560', testname, 'CUTEst')

def color_negative_red(val):
    color = 'red' if val < 0 else 'black'
    return f'color: {color}'
def color_negative_red_positive_green(val):
    if val > 0:
        return 'color: green'
    elif val < 0:
        return 'color: red'
    else:
        return ''

In [None]:
base_folder = get_test_result_folder('panoc-lbfgs-11-4')
new_folder = get_test_result_folder('panoc-2nd-lbfgs')

In [None]:
base_raw = load_raw_data(base_folder)
new_raw = load_raw_data(new_folder)

In [None]:
base_df = convert_data(base_raw)
new_df = convert_data(new_raw)

In [None]:
pd.set_option('display.max_rows', None)
new_df.style.format('{:.2e}', subset=(new_df.dtypes == float)) \
            .format('{:.8e}', subset=['f'])

In [None]:
def df_stats(df):
    conv = df['status'].value_counts()['Converged']
    tot = df['status'].count()
    tot_time = df['time'].sum()
    conv_time = df.where(df['status'] == 'Converged')['time'].sum()
    print(f'Converged:      {conv}/{tot} = {100*conv/tot:.02f}%')
    print(f'Total time:     {tot_time:.03f}s')
    print(f'Converged time: {conv_time:.03f}s')

In [None]:
print('Baseline\n---\n')
df_stats(base_df)
print('\n')
print('New test\n---\n')
df_stats(new_df)
print('\n')

In [None]:
def compare_results(a, b, columns):
#     res = pd.DataFrame()
#     for i, df in enumerate(dfs):
#         res[f'{column} {i}'] = df[column]
#     return res
    res = a[columns].join(b[columns], lsuffix=' 0', rsuffix=' 1')
    return res

In [None]:
cmp = compare_results(base_df, new_df, ['status', 'f', 'ε', 'δ']) #  'inner iterations', 'outer iterations',
cmp['f imprv'] = cmp['f 0'] - cmp['f 1']
cmp['rel f imprv'] = 100 * cmp['f imprv'] / abs(cmp['f 0'])
cmp.sort_values('rel f imprv') \
   .style.applymap(color_negative_red_positive_green, subset=['f imprv', 'rel f imprv']) \
         .format('{:.2e}', subset=(cmp.dtypes == float)) \
         .format('{:.8e}', subset=['f 0', 'f 1']) \
         .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), cmp.columns)))

In [None]:
worse_f = cmp[(cmp['f imprv'] < 0) & (cmp['status 1'] == 'Converged')]
better_f = cmp[(cmp['f imprv'] > 0) & (cmp['status 1'] == 'Converged')]
tol = 1e-5
really_worse_f = worse_f[abs(worse_f['rel f imprv']) > 100 * tol]
really_better_f = better_f[abs(better_f['rel f imprv']) > 100 * tol]
print(f'{len(worse_f)} tests got worse results')
print(f'{len(really_worse_f)} tests got significantly worse')
print(f'{len(better_f)} tests got better results')
print(f'{len(really_better_f)} tests got significantly better')

print('\nSignificantly worse tests:')
really_worse_f.sort_values('rel f imprv') \
              .style.applymap(color_negative_red_positive_green, subset=['f imprv', 'rel f imprv']) \
                    .format('{:.2e}', subset=(really_worse_f.dtypes == float)) \
                    .format('{:.8e}', subset=['f 0', 'f 1']) \
                    .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), really_worse_f.columns)))

In [None]:
statusses = base_df[['status']].join(new_df[['status']], lsuffix=' 0', rsuffix=' 1')
not_conv_to_conv = (statusses['status 0'] != 'Converged') & (statusses['status 1'] == 'Converged')
conv_to_not_conv = (statusses['status 0'] == 'Converged') & (statusses['status 1'] != 'Converged')

print(f'{len(base_df[not_conv_to_conv])} tests that didn\'t converge before do converge after the change')
print(f'{len(base_df[conv_to_not_conv])} tests that converged before no longer converge after the change')

display(HTML("<hr>"))

print('The following tests went from not converging to converging')
display(base_df[not_conv_to_conv].style.format('{:.2e}', subset=(base_df.dtypes == float)) \
                                       .format('{:.8e}', subset=['f']) \
                                       .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), base_df.columns))))
display(new_df[not_conv_to_conv].style.format('{:.2e}', subset=(new_df.dtypes == float)) \
                                      .format('{:.8e}', subset=['f']) \
                                      .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), new_df.columns))))

display(HTML("<hr>"))

print('The following tests went from converging to no longer converging')
display(base_df[conv_to_not_conv].style.format('{:.2e}', subset=(base_df.dtypes == float)) \
                                       .format('{:.8e}', subset=['f']) \
                                       .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), base_df.columns))))
display(new_df[conv_to_not_conv].style.format('{:.2e}', subset=(new_df.dtypes == float)) \
                                      .format('{:.8e}', subset=['f']) \
                                      .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), new_df.columns))))

In [None]:
both_converged = (base_df['status'] == 'Converged') & (new_df['status'] == 'Converged')
cmp = compare_results(base_df[both_converged], new_df[both_converged], ['status', 'time', 'f evaluations', 'grad_f evaluations'])
cmp['time imprv'] = cmp['time 0'] - cmp['time 1']
cmp['rel time imprv'] = 100 * cmp['time imprv'] / cmp['time 0']
cmp['f eval imprv'] = cmp['f evaluations 0'] - cmp['f evaluations 1']
cmp['rel f eval imprv'] = 100 * cmp['f eval imprv'] / cmp['f evaluations 0']
cmp['grad_f eval imprv'] = cmp['grad_f evaluations 0'] - cmp['grad_f evaluations 1']
cmp['rel grad_f eval imprv'] = 100 * cmp['grad_f eval imprv'] / cmp['grad_f evaluations 0']
print(f"Net time improvement:      {cmp['time imprv'].sum()}")
print(f"Relative time improvement: {100 * cmp['time imprv'].sum() / cmp['time 0'].sum():.02f}%")
print(f"Net f eval improvement:      {cmp['f eval imprv'].sum()}")
print(f"Relative f eval improvement: {100 * cmp['f eval imprv'].sum() / cmp['f evaluations 0'].sum():.02f}%")
print(f"Net grad_f eval improvement:      {cmp['grad_f eval imprv'].sum()}")
print(f"Relative grad_f eval improvement: {100 * cmp['grad_f eval imprv'].sum() / cmp['grad_f evaluations 0'].sum():.02f}%")
print('positive is good, negative is bad')


print(f"Time:                  {base_df[both_converged]['time'].sum()}\t{new_df[both_converged]['time'].sum()}")
print(f"Objective evaluations: {base_df[both_converged]['f evaluations'].sum()}\t{new_df[both_converged]['f evaluations'].sum()}")
print(f"Gradient evaluations:  {base_df[both_converged]['grad_f evaluations'].sum()}\t{new_df[both_converged]['grad_f evaluations'].sum()}")
print(f"Outer iterations:      {base_df[both_converged]['outer iterations'].sum()}\t{new_df[both_converged]['outer iterations'].sum()}")
print(f"Inner iterations:      {base_df[both_converged]['inner iterations'].sum()}\t{new_df[both_converged]['inner iterations'].sum()}")
print(f"Linesearch failures:   {base_df[both_converged]['linesearch failures'].sum()}\t{new_df[both_converged]['linesearch failures'].sum()}")
print(f"L-BFGS failures:       {base_df[both_converged]['L-BFGS failures'].sum()}\t{new_df[both_converged]['L-BFGS failures'].sum()}")
print(f"L-BFGS rejected:       {base_df[both_converged]['L-BFGS rejected'].sum()}\t{new_df[both_converged]['L-BFGS rejected'].sum()}")

cmp.sort_values('rel f eval imprv') \
   .style.applymap(color_negative_red_positive_green, subset=['time imprv', 'rel time imprv', 'f eval imprv', 'rel f eval imprv', 'grad_f eval imprv', 'rel grad_f eval imprv']) \
         .format('{:.2e}', subset=(cmp.dtypes == float)) \
         .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), cmp.columns)))

In [None]:
cmp = compare_results(base_df[both_converged], new_df[both_converged], ['status', 'linesearch failures', 'L-BFGS failures', 'L-BFGS rejected'])
cmp['ls imprv'] = cmp['linesearch failures 0'] - cmp['linesearch failures 1']
cmp['rel ls imprv'] = 100 * cmp['ls imprv'] / cmp['linesearch failures 0']
cmp['lbfgs imprv'] = cmp['L-BFGS failures 0'] - cmp['L-BFGS failures 1']
cmp['rel lbfgs imprv'] = 100 * cmp['lbfgs imprv'] / cmp['L-BFGS failures 0']
cmp['lbfgs rej imprv'] = cmp['L-BFGS rejected 0'] - cmp['L-BFGS rejected 1']
cmp['rel lbfgs rej imprv'] = 100 * cmp['lbfgs rej imprv'] / cmp['L-BFGS rejected 0']
print(f"Net linesearch failures improvement:    {cmp['ls imprv'].sum()}")
print(f"Relative linesearch improvement:        {100. * cmp['ls imprv'].sum() / cmp['linesearch failures 0'].sum():.02f}%")
print(f"Net L-BFGS failures improvement:        {cmp['lbfgs imprv'].sum()}")
print(f"Relative L-BFGS failures improvement:   {100. * cmp['lbfgs imprv'].sum() / cmp['L-BFGS failures 0'].sum():.02f}%")
print(f"Net L-BFGS rejections improvement:      {cmp['lbfgs rej imprv'].sum()}")
print(f"Relative L-BFGS rejections improvement: {100. * cmp['lbfgs rej imprv'].sum() / cmp['L-BFGS rejected 0'].sum():.02f}%")

cmp.sort_values('rel ls imprv') \
   .style.applymap(color_negative_red_positive_green, subset=['ls imprv', 'rel ls imprv', 'lbfgs imprv', 'rel lbfgs imprv', 'lbfgs rej imprv', 'rel lbfgs rej imprv']) \
         .format('{:.2e}', subset=(cmp.dtypes == float)) \
         .format('{:.02f}%', subset=list(filter(lambda s: s.startswith('rel'), cmp.columns)))

In [None]:
base_df[base_df['ε'] == 0]

In [None]:
new_df[new_df['ε'] == 0]

In [None]:
print('"' + '"\n"'.join(new_df[new_df['box constr x'] > 0].index.values) + '"')

In [None]:
new_df[new_df.index == 'HS75']

In [None]:
base_df[base_df.index == 'HS75']