# Generate test comparison data

Notebook with which the comparison data for the tests is generated, in case the old results are no longer valid.

In [1]:
import pandas as pd
import sys
sys.path.append('../../anduryl')
import anduryl
import numpy as np

### Validation table

In [6]:
# Load Excalibur table as format
validation_table = pd.read_csv('../test/data/Excalibur.csv', sep=';', header=[0, 1], index_col=0)
anduryl_table = validation_table.copy()

# Case name - file
files = {
    'Arkansas': 'Arkansas.mat',
    'Arsenic': 'arsenic d-r.mat',
    'ATCEP': 'ATCEP Error.mat',
    'Biol_Agent': 'Biol agents.mat',
    'CDC_ROI': 'CDC ROI Final.mat',
    'CoveringKids': 'CoveringKids.mat',
    'create-vicki': 'create.mat',
    'CWD': 'cwd.mat',
    'Daniela': 'Daniela.mat',
    'DCPN_Fistula': 'dcpn_fistula.mat',
    'eBPP': 'ebbp.mat',
    'Eff_Erup': 'EffusiveErupt.mat',
    'Erie_Carp': 'Erie Carps.mat',
    'FCEP': 'FCEP Error.mat',
    'Florida': 'Florida.mat',
    'Gerstenberger': 'Gerstenberger.mat',
    'GL_NIS': 'gl-nis.mat',
    'Goodheart': 'Goodheart.mat',
    'Hemopilia': 'Hemophilia.mat',
    'IceSheets': 'IceSheet2012.mat',
    'Illinois': 'Illinois.mat',
    'Liander': 'liander.mat',
    'Nebraska': 'Nebraska.mat',
    'Obesity': 'obesity_ms.mat',
    'PHAC_T4': 'PHAC 2009 final.mat',
    'San_Diego': 'San Diego.mat',
    'Sheep': 'Sheep Scab.mat',
    'SPEED': 'speed.mat',
    'TDC': 'tdc.mat',
    'Tobacco': 'tobacco.mat',
    'Topaz': 'Topaz.mat',
    'UMD_NREMOVAL': 'umd_nremoval.mat',
    'Washington': 'Washington.mat'
}

for key, file in files.items():

    project = anduryl.Project()
    file = file.replace('.mat', '')
    project.io.load_excalibur(f'../data/{file}.dtt', f'../data/{file}.rls')

    project.calculate_decision_maker(weight_type='item', overshoot=0.1, exp_id='DM1', exp_name='Item opt.')
    itemopt = np.round([project.experts.calibration[-1], project.experts.info_real[-1], project.experts.info_real[-1] * project.experts.calibration[-1]], 2)

    project.calculate_decision_maker(weight_type='global', overshoot=0.1, exp_id='DM2', exp_name='Global opt.')
    globopt = np.round([project.experts.calibration[-1], project.experts.info_real[-1], project.experts.info_real[-1] * project.experts.calibration[-1]], 2)

    project.calculate_decision_maker(weight_type='global', alpha=0.0, overshoot=0.1, exp_id='DM3', exp_name='Global Non-opt.')
    globnonopt = np.round([project.experts.calibration[-1], project.experts.info_real[-1], project.experts.info_real[-1] * project.experts.calibration[-1]], 2)

    project.calculate_decision_maker(weight_type='equal', overshoot=0.1, exp_id='DM4', exp_name='Equal')
    equal = np.round([project.experts.calibration[-1], project.experts.info_real[-1], project.experts.info_real[-1] * project.experts.calibration[-1]], 2)

    imax = np.argmax([project.experts.comb_score[i] for i in project.experts.actual_experts])
    ibest = project.experts.actual_experts[imax]
    best_exp = np.round([project.experts.calibration[ibest], project.experts.info_real[ibest], project.experts.info_real[ibest] * project.experts.calibration[ibest]], 2)
    
    if not (validation_table.loc[key, 'PW Global'].values == globopt).all():
        print(key, validation_table.loc[key, 'PW Global'].values, globopt, 'global opt')
    if not (validation_table.loc[key, 'PW Non-optimized'].values == globnonopt).all():
        print(key, validation_table.loc[key, 'PW Non-optimized'].values, globnonopt, 'global non opt')
    if not (validation_table.loc[key, 'PW Item'].values == itemopt).all():
        print(key, validation_table.loc[key, 'PW Item'].values, itemopt, 'item opt')
    if not (validation_table.loc[key, 'Equal weight'].values == equal).all():
        print(key, validation_table.loc[key, 'Equal weight'].values, equal, 'Equal weight')
    if not (validation_table.loc[key, 'Best Expert'].values == best_exp).all():
        print(key, validation_table.loc[key, 'Best Expert'].values, best_exp, 'Best Expert')
    
    anduryl_table.loc[key, 'PW Global'] = globopt
    anduryl_table.loc[key, 'PW Non-optimized'] = globnonopt
    anduryl_table.loc[key, 'PW Item'] = itemopt
    anduryl_table.loc[key, 'Equal weight'] = equal
    anduryl_table.loc[key, 'Best Expert'] = best_exp

CDC_ROI [0.72 2.31 1.66] [0.72 2.3  1.66] global opt
CDC_ROI [0.72 2.31 1.66] [0.72 2.3  1.66] item opt
CDC_ROI [0.72 2.31 1.66] [0.72 2.3  1.66] Best Expert
CWD [0.49 1.22 0.6 ] [0.49 1.21 0.6 ] global opt
Gerstenberger [0.93 1.1  1.02] [0.93 1.09 1.02] global opt
Hemopilia [0.31 0.49 0.15] [0.31 0.3  0.09] global opt
Hemopilia [0.31 0.46 0.14] [0.31 0.41 0.13] item opt
IceSheets [0.62 0.7  0.43] [0.37 0.66 0.25] global non opt
Topaz [0.41 1.46 0.6 ] [0.41 1.45 0.6 ] global opt
Topaz [0.41 1.46 0.6 ] [0.41 1.45 0.6 ] item opt
Topaz [0.41 1.46 0.6 ] [0.41 1.45 0.6 ] Best Expert


In [7]:
anduryl_table.to_csv('../test/data/Anduryl.csv', sep=';')

### Robustness for tobacco case

In [None]:
from anduryl.core.calculate import get_combinations

In [53]:
project = anduryl.Project()
project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')

# Manually remove each item, and calculate the three numbers
seed_idx = project.items.get_idx("seed")
seed_ids = [item for i, item in enumerate(project.items.ids) if seed_idx[i]]

# Get combinations of items to exclude
combs = get_combinations(items=seed_ids, min_exclude=0, max_exclude=4)

res = {}

for comb in combs:
    project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')
    # Remove items
    for i in comb[::-1]:
        project.items.remove_item(project.items.ids[i])
    # Calculate DM
    project.calculate_decision_maker(weight_type='global', overshoot=0.1, exp_id='DM', alpha=0.0)
    
    # Save numbers
    res[tuple(seed_ids[i] for i in comb)] = {
        'Info score total': project.experts.info_total[-1],
        'Info score realizations': project.experts.info_real[-1],
        'Calibration score': project.experts.calibration[-1]
    }
    
# Create table from nested dictionary
table = pd.DataFrame(
    data=res.values(),
    index=list(res.keys()),
    columns=['Info score total' ,'Info score realizations', 'Calibration score']
)

table.to_csv('../test/data/item_robustness_tobacco.csv', sep=';')

In [44]:
# project = anduryl.Project()
# project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')


# project.calculate_item_robustness(
#     weight_type='global',
#     overshoot=0.1,
#     max_exclude=4,
#     min_exclude=0,
#     calpower=1.0,
#     alpha=0.0
# )

# robres = project.main_results.item_robustness
# df = pd.DataFrame(data=robres.values(), index=list(robres.keys()),
#                   columns=['Info score total' ,'Info score realizations', 'Calibration score'])

In [55]:
project = anduryl.Project()
project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')

# Manually remove each item, and calculate the three numbers
nexperts = len(project.experts.ids)
combs = get_combinations(items=list(range(nexperts)), min_exclude=0, max_exclude=4)

res = {}

for comb in combs:
    project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')
    # Remove items
    for i in comb[::-1]:
        project.experts.remove_expert(project.experts.ids[i])
    # Calculate DM
    project.calculate_decision_maker(weight_type='global', overshoot=0.1, exp_id='DM', alpha=0.0)
    
    # Save numbers
    res[tuple(seed_ids[i] for i in comb)] = {
        'Info score total': project.experts.info_total[-1],
        'Info score realizations': project.experts.info_real[-1],
        'Calibration score': project.experts.calibration[-1]
    }
    
# Create table from nested dictionary
table = pd.DataFrame(
    data=res.values(),
    index=list(res.keys()),
    columns=['Info score total' ,'Info score realizations', 'Calibration score']
)

table.to_csv('../test/data/expert_robustness_tobacco.csv', sep=';')

In [3]:
# project = anduryl.Project()
# project.io.load_excalibur(f'../cases/tobacco.dtt', f'../cases/tobacco.rls')


# project.calculate_expert_robustness(
#     weight_type='global',
#     overshoot=0.1,
#     max_exclude=4,
#     min_exclude=0,
#     calpower=1.0,
#     alpha=0.0
# )

# robres = project.main_results.expert_robustness
# df = pd.DataFrame(data=robres.values(), index=list(robres.keys()),
#                   columns=['Info score total' ,'Info score realizations', 'Calibration score'])

# df.to_csv('../test/data/expert_robustness_tobacco.csv', sep=';')