In [None]:
from os.path import join, dirname, basename, splitext, abspath
from glob import glob
import pandas as pd
import yaml
from datetime import timedelta
from IPython.display import display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

this_folder = dirname(__file__) if '__file__' in globals() else abspath('')
root_folder = dirname(dirname(this_folder))
def get_test_result_folder(testname = 'baseline'):
    return join(root_folder, 'build', 'testresults', testname, 'CUTEst')

def color_negative_red(val):
    color = 'red' if val < 0 else 'black'
    return f'color: {color}'

In [None]:
def load_raw_data(folder):
    output_files = glob(join(folder, '*.yaml'))
    raw_data = {}
    for filename in output_files:
        with open(filename, 'r') as f:
            all_content = yaml.safe_load_all(f)
            content = next(all_content)
            name = splitext(basename(filename))[0]
            raw_data[name] = content
    return raw_data

In [None]:
def convert_data(raw_data):
    data = []
    for name, content in raw_data.items():
        element = {
            'name': name,
            'status': content['status'],
#             'time': timedelta(seconds=content['elapsed time']),
            'time': float(content['elapsed time']),
            'inner iterations': content['inner iterations'],
            'outer iterations': content['outer iterations'],
            'inner convergence failures': content['inner convergence failures'],
            'f': float(content['f']),
            'ε': float(content['ε']),
            'δ': float(content['δ']),
            'f evaluations': content['counters']['f'],
            'grad_f evaluations': content['counters']['grad_f'],
            'g evaluations': content['counters']['g'],
            'grad_g evaluations': content['counters']['grad_g'],
            'linesearch failures': content['linesearch failures'],
            'L-BFGS failures': content['L-BFGS failures'],
            'L-BFGS rejected': content['L-BFGS rejected'],
        }
        data.append(element)
    df = pd.DataFrame(data)
    # df.sort_values(['status', 'inner iterations'], inplace=True, ignore_index=True)
    # df.sort_values(['name'], inplace=True, ignore_index=True)
    df.set_index('name', inplace=True)
    df.sort_index(inplace=True)

    df['rel linesearch failures'] = df['linesearch failures'] / df['inner iterations']
    return df

In [None]:
base_folder = get_test_result_folder('baseline')
lslp_folder = get_test_result_folder('linesearch-lipschitz')

In [None]:
base_raw = load_raw_data(base_folder)
lslp_raw = load_raw_data(lslp_folder)

In [None]:
base_df = convert_data(base_raw)
lslp_df = convert_data(lslp_raw)

In [None]:
lslp_df

In [None]:
def df_stats(df):
    conv = df['status'].value_counts()['Converged']
    tot = df['status'].count()
    tot_time = df['time'].sum()
    conv_time = df.where(df['status'] == 'Converged')['time'].sum()
    print(f'Converged:      {conv}/{tot} = {100*conv/tot}%')
    print(f'Total time:     {tot_time}')
    print(f'Converged time: {conv_time}')

In [None]:
print('Baseline\n---\n')
df_stats(base_df)
print('\n')
print('Linesearch with Lipschitz check\n---\n')
df_stats(lslp_df)
print('\n')

In [None]:
def compare_results(a, b, columns):
#     res = pd.DataFrame()
#     for i, df in enumerate(dfs):
#         res[f'{column} {i}'] = df[column]
#     return res
    res = a[columns].join(b[columns], lsuffix=' 0', rsuffix=' 1')
    return res

In [None]:
pd.set_option('display.max_rows', None)
cmp = compare_results(base_df, lslp_df, ['status', 'f', 'ε', 'δ']) #  'inner iterations', 'outer iterations',
cmp['improvement'] = cmp['f 0'] - cmp['f 1']
cmp['rel improvement'] = cmp['improvement'] / cmp['f 0']
cmp.style.applymap(color_negative_red, subset=['improvement', 'rel improvement']) \
         .format('{:.2e}', subset=(cmp.dtypes == float))

In [None]:
worse_f = cmp[(cmp['improvement'] < 0) & (cmp['status 1'] == 'Converged')]
print(f'{len(worse_f)} tests got worse results')
tol = 1e-5
really_worse_f = worse_f[abs(worse_f['rel improvement']) > tol]
print(f'{len(really_worse_f)} tests got significantly worse')
really_worse_f

In [None]:
statusses = base_df[['status']].join(lslp_df[['status']], lsuffix=' 0', rsuffix=' 1')
not_conv_to_conv = (statusses['status 0'] != 'Converged') & (statusses['status 1'] == 'Converged')
conv_to_not_conv = (statusses['status 0'] == 'Converged') & (statusses['status 1'] != 'Converged')

print(f'{len(base_df[not_conv_to_conv])} tests that didn\'t converge before do converge after the change')
print(f'{len(base_df[conv_to_not_conv])} tests that converged before no longer converge after the change')

display(base_df[conv_to_not_conv])
display(lslp_df[conv_to_not_conv])

In [None]:
# display(base_df[not_conv_to_conv])
# display(lslp_df[not_conv_to_conv])

In [None]:
both_converged = (base_df['status'] == 'Converged') & (lslp_df['status'] == 'Converged')
cmp = compare_results(base_df[both_converged], lslp_df[both_converged], ['status', 'time', 'f evaluations', 'grad_f evaluations'])
cmp['time improvement'] = cmp['time 0'] - cmp['time 1']
cmp['rel time improvement'] = cmp['time improvement'] / cmp['time 0']
cmp['f eval improvement'] = cmp['f evaluations 0'] - cmp['f evaluations 1']
cmp['rel f eval improvement'] = cmp['f eval improvement'] / cmp['f evaluations 0']
cmp['grad_f eval improvement'] = cmp['grad_f evaluations 0'] - cmp['grad_f evaluations 1']
cmp['rel grad_f eval improvement'] = cmp['grad_f eval improvement'] / cmp['grad_f evaluations 0']
print(f"Net time improvement:      {cmp['time improvement'].sum()}")
print(f"Relative time improvement: {cmp['rel time improvement'].sum()}")
print(f"Net f eval improvement:      {cmp['f eval improvement'].sum()}")
print(f"Relative f eval improvement: {cmp['rel f eval improvement'].sum()}")
print(f"Net grad_f eval improvement:      {cmp['grad_f eval improvement'].sum()}")
print(f"Relative grad_f eval improvement: {cmp['rel grad_f eval improvement'].sum()}")
print('positive is good, negative is bad')

cmp.style.applymap(color_negative_red, subset=['time improvement', 'rel time improvement', 'f eval improvement', 'rel f eval improvement', 'grad_f eval improvement', 'rel grad_f eval improvement']) \
         .format('{:.2e}', subset=(cmp.dtypes == float))

In [None]:
cmp = compare_results(base_df[both_converged], lslp_df[both_converged], ['status', 'linesearch failures', 'L-BFGS failures', 'L-BFGS rejected'])
cmp['ls imprv'] = cmp['linesearch failures 0'] - cmp['linesearch failures 1']
cmp['rel ls imprv'] = cmp['ls imprv'] / cmp['linesearch failures 0']
cmp['lbfgs imprv'] = cmp['L-BFGS failures 0'] - cmp['L-BFGS failures 1']
cmp['rel lbfgs imprv'] = cmp['lbfgs imprv'] / cmp['L-BFGS failures 0']
cmp['lbfgs rej imprv'] = cmp['L-BFGS rejected 0'] - cmp['L-BFGS rejected 1']
cmp['rel lbfgs rej imprv'] = cmp['lbfgs rej imprv'] / cmp['L-BFGS rejected 0']
print(f"Net linesearch failures improvement:    {cmp['ls imprv'].sum()}")
print(f"Relative linesearch improvement:        {cmp['rel ls imprv'].sum()}")
print(f"Net L-BFGS failures improvement:        {cmp['lbfgs imprv'].sum()}")
print(f"Relative L-BFGS failures improvement:   {cmp['rel lbfgs imprv'].sum()}")
print(f"Net L-BFGS rejections improvement:      {cmp['lbfgs rej imprv'].sum()}")
print(f"Relative L-BFGS rejections improvement: {cmp['rel lbfgs rej imprv'].sum()}")

cmp.style.applymap(color_negative_red, subset=['ls imprv', 'rel ls imprv', 'lbfgs imprv', 'rel lbfgs imprv', 'lbfgs rej imprv', 'rel lbfgs rej imprv']) \
         .format('{:.2e}', subset=(cmp.dtypes == float))