In [1]:
%load_ext autoreload
%autoreload 2

import sys
import numpy as np
from scipy.stats import trim_mean
from sklearn.metrics import mean_squared_error

sys.path.append('..')
from higgs_inference import settings
from higgs_inference.various.utils import format_number

In [2]:
result_dir = '../results/'

# TablePrinter class

In [3]:
class TablePrinter:
    
    def __init__(self, metric_fns=[], header=None, precisions=[]):
        
        # Functions for metrics
        self.metric_fns = metric_fns
        self.n_metrics = len(self.metric_fns)
        self.precisions = precisions if len(precisions) == self.n_metrics else [2] * self.n_metrics
        
        # Total table and current block
        self.table = ''
        self.block_entries = []
        self.block_brackets = []
        self.content_in_last_block = False

        # Formatting options
        self.indent = '   '
        self.col_sep = ' & '
        self.end_row = r'\\'
        self.midrule = r'\midrule'
        self.end_line = '\n'
        self.emphasis_begin = r'\mathbf{'
        self.emphasis_end = r'}'
        
        # Header
        self.table = ''
        if header is not None:
            self.table += self.indent + header + self.end_row + self.end_line
    
    
    def finalise_block(self):
        
        self.content_in_last_block = False
        
        # Skip if block is empty
        if len(self.block_entries) == 0:
            return

        # Find best performance
        block_metrics = [line[2:] for line in self.block_entries]
        block_metrics = np.array(block_metrics)
        block_best = []
        for i in range(self.n_metrics):
            try:
                block_best.append(np.nanargmin(block_metrics[:,i]))
            except ValueError:
                block_best.append(-1)

        # Format entries
        text = ''
        for i, (line, brackets) in enumerate(zip(self.block_entries, self.block_brackets)):
            
            # Skip entirely empty lines
            try:
                if not np.any(np.isfinite(line[2:])):
                    continue
            except TypeError:
                print(line)
                continue
                
            self.content_in_last_block = True
            
            # Labels
            text += self.indent + line[0] + self.col_sep + line[1] + self.col_sep
            
            # Metrics
            for j in range(self.n_metrics):
                if np.isfinite(line[j + 2]):
                    if brackets[j+2]:
                        text += '(' + format_number(line[j + 2], self.precisions[j], latex_math_mode=True, emphasize=(i == block_best[j])) + ')'
                    else:
                        text += format_number(line[j + 2], self.precisions[j], latex_math_mode=True, emphasize=(i == block_best[j]))
                if j == len(line) - 3:
                    text += self.end_row + self.end_line
                else:
                    text += self.col_sep

        # Add to document and reset for next block
        self.table += text
        self.block_entries = []
        self.block_brackets = []
    
    
    def new_block(self):
        self.finalise_block()
        if self.content_in_last_block:
            self.table += self.indent + self.midrule + self.end_line
            self.content_in_last_block = False
    
    
    def add(self, col1, col2, filename, folder='parameterized'):
        
        # Label columns
        line = [col1, col2]
        if len(self.block_entries) > 0:
            for entry in self.block_entries:
                if entry[0] == col1:
                    line = ['', col2]
                    break
        brackets = [False, False]
        
        # Metrics
        for fn in self.metric_fns:
            bracket = False
            try:
                value = fn(filename, folder)
            except IOError:
                #print('File', filename, 'in folder', folder, 'not found')
                value = np.nan
            except ValueError:
                #print('File', filename, 'in folder', folder, 'leads to ValueError')
                value = np.nan
                
            if isinstance(value, (list, tuple)):
                value, bracket = value
                
            line.append(value)
            brackets.append(bracket)

        if np.any(np.isfinite(line[2:])):
            self.block_entries.append(line)
            self.block_brackets.append(brackets)
    
    
    def print(self):
        self.finalise_block()
        return self.table

# Metrics

In [4]:
def expected_mse_log_r(filename, folder='parameterized'):
    mse_log_r = np.load(result_dir + folder + '/mse_logr_' + filename + '.npy')
    return np.sum(settings.theta_prior * mse_log_r)

def expected_mse_log_r_10k(filename, folder='parameterized'):
    mse_log_r = np.load(result_dir + folder + '/mse_logr_' + filename + '_trainingsamplesize_10000.npy')
    return np.sum(settings.theta_prior * mse_log_r)

def expected_mse_log_r_100k(filename, folder='parameterized'):
    mse_log_r = np.load(result_dir + folder + '/mse_logr_' + filename + '_trainingsamplesize_100000.npy')
    return np.sum(settings.theta_prior * mse_log_r)
                       
def expected_trimmed_mse_log_r(filename, folder='parameterized'):
    mse_log_r = np.load(result_dir + folder + '/trimmed_mse_logr_' + filename + '.npy')
    return np.sum(settings.theta_prior * mse_log_r)

# Result table

In [6]:
table = TablePrinter([expected_mse_log_r, expected_trimmed_mse_log_r], precisions=[4,5])

table.add('Histogram', '', 'histo_2d_asymmetricbinning', 'histo')
table.add(r'\carl', '', 'carl_calibrated_shallow', 'parameterized')

table.new_block()
table.add(r'\rolr', '', 'regression_calibrated', 'parameterized')
table.add(r'\cascal', '', 'combined_calibrated_deep', 'parameterized')
table.add(r'\rascal', '', 'combinedregression_calibrated_deep', 'parameterized')
table.add(r'\alice', '', 'mxe_calibrated_deep', 'parameterized')
table.add(r'\alices', '', 'combinedmxe_calibrated_deep', 'parameterized')

table.new_block()
table.add(r'\sally', '', 'scoreregression_rotatedscore_deep', 'score_regression')
table.add(r'\sallino', '', 'scoreregression_scoretheta_deep', 'score_regression')

print(table.print())


   Histogram &  & $0.0561$ & $0.01057$\\
   \carl &  & $\mathbf{0.0124}$ & $\mathbf{0.00259}$\\
   \midrule
   \rolr &  & $0.0032$ & $0.00166$\\
   \cascal &  & $0.0008$ & $0.00024$\\
   \rascal &  & $0.0009$ & $0.00037$\\
   \alice &  & $\mathbf{0.0004}$ & $\mathbf{0.00008}$\\
   \alices &  & $0.0013$ & $0.00038$\\
   \midrule
   \sally &  & $\mathbf{0.0132}$ & $\mathbf{0.00025}$\\
   \sallino &  & $0.0213$ & $0.00063$\\



In [5]:
table = TablePrinter([expected_mse_log_r_10k, expected_mse_log_r_100k, expected_mse_log_r], precisions=[4,4,4])

table.add('Histogram', '', 'histo_2d_asymmetricbinning', 'histo')
table.add(r'\carl', '', 'carl_calibrated_shallow', 'parameterized')

table.new_block()
table.add(r'\rolr', '', 'regression_calibrated', 'parameterized')
table.add(r'\cascal', '', 'combined_calibrated_deep', 'parameterized')
table.add(r'\rascal', '', 'combinedregression_calibrated_deep', 'parameterized')
table.add(r'\alice', '', 'mxe_calibrated_deep', 'parameterized')
table.add(r'\alices', '', 'combinedmxe_calibrated_deep', 'parameterized')

table.new_block()
table.add(r'\sally', '', 'scoreregression_rotatedscore_deep', 'score_regression')
table.add(r'\sallino', '', 'scoreregression_scoretheta_deep', 'score_regression')

print(table.print())


   Histogram &  &  &  & $0.0561$\\
   \carl &  & $\mathbf{0.1743}$ & $\mathbf{0.1672}$ & $\mathbf{0.0124}$\\
   \midrule
   \rolr &  & $0.1345$ & $0.0396$ & $0.0032$\\
   \cascal &  & $0.1715$ & $0.1652$ & $0.0008$\\
   \rascal &  & $0.0449$ & $0.0100$ & $0.0009$\\
   \alice &  & $0.0510$ & $\mathbf{0.0076}$ & $\mathbf{0.0004}$\\
   \alices &  & $\mathbf{0.0339}$ & $0.0111$ & $0.0013$\\
   \midrule
   \sally &  & $\mathbf{0.0261}$ & $\mathbf{0.0146}$ & $\mathbf{0.0132}$\\
   \sallino &  & $0.0319$ & $0.0227$ & $0.0213$\\



## Hyperparameter scan

In [7]:
labels = [r'\alice (raw)',
          r'\alice (calibrated)',
          r'\alices (raw)',
          r'\alices (calibrated)']

folders = (['parameterized'] * 4)

filenames = ['mxe',
             'mxe_calibrated',
             'combinedmxe',
             'combinedmxe_calibrated']

def show_main_table(algorithm_begin=0, algorithm_end=None):
    
    table = TablePrinter([expected_mse_log_r, expected_trimmed_mse_log_r],
                          precisions=[4, 4])

    for i, (label, filename, folder) in enumerate(
        zip(labels[algorithm_begin:algorithm_end], filenames[algorithm_begin:algorithm_end], folders[algorithm_begin:algorithm_end])):
        
        if i > 0:
            table.new_block()

        
        table.add(label, r'Baseline', filename)
        table.add(label, r'Baseline, shallow', filename + '_shallow')
        table.add(label, r'Baseline, deep', filename + '_deep')

        table.add(label, r'Baseline, $\alpha = 0.5$, deep', filename + '_alpha_0.50_deep')
        table.add(label, r'Baseline, $\alpha = 1$, deep', filename + '_alpha_1.0_deep')
        table.add(label, r'Baseline, $\alpha = 2$, deep', filename + '_alpha_2.0_deep')
        table.add(label, r'Baseline, $\alpha = 5$, deep', filename + '_alpha_5.0_deep')
        table.add(label, r'Baseline, $\alpha = 10$, deep', filename + '_alpha_10_deep')
        table.add(label, r'Baseline, $\alpha = 20$, deep', filename + '_alpha_20_deep')
        table.add(label, r'Baseline, $\alpha = 50$, deep', filename + '_alpha_50_deep')
        table.add(label, r'Baseline, $\alpha = 100$, deep', filename + '_alpha_100_deep')
        table.add(label, r'Baseline, $\alpha = 200$, deep', filename + '_alpha_200_deep')
        table.add(label, r'Baseline, $\alpha = 500$, deep', filename + '_alpha_500_deep')
        table.add(label, r'Baseline, $\alpha = 1000$, deep', filename + '_alpha_1000_deep')

        table.add(label, r'Baseline, large batches', filename + '_largebatch')
        table.add(label, r'Baseline, small batches', filename + '_smallbatch')
        table.add(label, r'Baseline, const.\ LR', filename + '_constantlr')
        table.add(label, r'Baseline, const.\ LR, large batches', filename + '_constantlr_largebatch')
        table.add(label, r'Baseline, const.\ LR, small batches', filename + '_constantlr_largebatch')
        table.add(label, r'Baseline, small LR', filename + '_slowlearning')
        table.add(label, r'Baseline, small LR, large batches', filename + '_slowlearning_largebatch')
        table.add(label, r'Baseline, small LR, small batches', filename + '_slowlearning_smallbatch')
        table.add(label, r'Baseline, small const.\ LR', filename + '_slowlearning_constantlr')
        table.add(label, r'Baseline, small const.\ LR, large batches', filename + '_slowlearning_constantlr_largebatch')
        table.add(label, r'Baseline, small const.\ LR, small batches', filename + '_slowlearning_constantlr_smallbatch')
        table.add(label, r'Baseline, large LR', filename + '_fastlearning')
        table.add(label, r'Baseline, large LR, large batches', filename + '_fastlearning_largebatch')
        table.add(label, r'Baseline, large LR, small batches', filename + '_fastlearning_smallbatch')
        table.add(label, r'Baseline, large const.\ LR', filename + '_fastlearning_constantlr')
        table.add(label, r'Baseline, large const.\ LR, large batches', filename + '_fastlearning_constantlr_largebatch')
        table.add(label, r'Baseline, large const.\ LR, small batches', filename + '_fastlearning_constantlr_smallbatch')

        table.add(label, r'Random $\boldtheta$', filename + '_random')
        table.add(label, r'Random $\boldtheta$, shallow', filename + '_random_shallow')
        table.add(label, r'Random $\boldtheta$, deep', filename + '_random_deep')

        table.add(label, r'Morphing basis', filename + '_basis')
        table.add(label, r'Morphing basis, shallow', filename + '_basis_shallow')
        table.add(label, r'Morphing basis, deep', filename + '_basis_deep')

    print(table.print())


In [8]:
show_main_table()

   \alice (raw) & Baseline & $0.0008$ & $0.0001$\\
    & Baseline, shallow & $0.0021$ & $0.0001$\\
    & Baseline, deep & $\mathbf{0.0005}$ & $\mathbf{0.0001}$\\
    & Random $\boldtheta$ & $0.0008$ & $0.0001$\\
    & Random $\boldtheta$, shallow & $0.0022$ & $0.0001$\\
    & Random $\boldtheta$, deep & $0.0005$ & $0.0001$\\
   \midrule
   \alice (calibrated) & Baseline & $0.0006$ & $0.0001$\\
    & Baseline, shallow & $0.0016$ & $0.0001$\\
    & Baseline, deep & $0.0004$ & $0.0001$\\
    & Random $\boldtheta$ & $0.0006$ & $\mathbf{0.0001}$\\
    & Random $\boldtheta$, shallow & $0.0016$ & $0.0001$\\
    & Random $\boldtheta$, deep & $\mathbf{0.0004}$ & $0.0001$\\
   \midrule
   \alices (raw) & Baseline & $0.0018$ & $0.0004$\\
    & Baseline, shallow & $0.0033$ & $0.0005$\\
    & Baseline, deep & $\mathbf{0.0014}$ & $0.0004$\\
    & Baseline, $\alpha = 0.5$, deep & $0.0016$ & $0.0003$\\
    & Baseline, $\alpha = 1$, deep & $0.0014$ & $\mathbf{0.0003}$\\
    & Baseline, $\alpha = 2$, de