# Comparison of double-sum formulations
The final log-loss of U-max using the proposed double-sum formulation and Raman's double-sum formulation are compared.

In [39]:
import pickle
import numpy as np
import os.path
import math

In [42]:
def get_final_log_loss(sgd_name, dataset, learning_rate, directory):
    """Returns final log loss for U-max on the Eurlex dataset.
    
    tilde:              True = Raman's formulation, 
                        False = proposed formulation
    learning_rate:      Learning rate
    """
    directory_name = 'Results/Complete/' + directory + '/'
    file_name = (directory_name 
                 + sgd_name + '_'
                 + dataset + '_'
                 + ('lr_' if directory == 'Tuning' else '')
                 + learning_rate
                 + ('_prop_data_0.1' if directory == 'Tuning' else '')
                 +'.p')

    if os.path.isfile(file_name):
        with open(file_name, 'rb') as f:
            results = pickle.load(f)
            final_log_loss = np.mean(results['train'][:, :, 1], axis=0)[-1]
            return None if math.isnan(final_log_loss) else final_log_loss
        

In [3]:
learning_rates = ['1000.0', '100.0', '10.0', '1.0', '0.1', '0.01', '0.001']

In [4]:
# Gather final log-losses
relative_log_losses = [(get_final_log_loss('tilde_Umax', 'Eurlex', lr, 'Original') 
                       / get_final_log_loss('Umax', 'Eurlex', lr, 'Original'))
                       for lr in learning_rates]

In [5]:
# Display results

print('Relative log_losses: {}'.format(relative_log_losses))
mean_relative_log_losses = np.mean(relative_log_losses)
print('\nMean relative log_loss: {}'.format(mean_relative_log_losses))

Relative log_losses: [4.1697272310583493, 4.2467509260447054, 4.0496241976833636, 3.6603675901527217, 2.3218590387451092, 1.686531567322741, 1.4035453259455466]

Mean relative log_loss: 3.076915125278934


# Similar calculations comparing Umax and Vanilla SGD

In [59]:
datasets = ['Eurlex', 'wikiSmall', 'AmazonCat', 'wiki10', 'Delicious', 'Bibtex', 'mnist']
learning_rates = ['100.0', '10.0', '1.0', '0.1', '0.01', '0.001', '0.0001']
ratio = [(get_final_log_loss('VanillaSGD', dataset, lr, 'Tuning')
           / get_final_log_loss('Umax', dataset, lr, 'Original' if dataset == 'Eurlex' else 'Tuning')) #,dataset ,lr)
         for dataset in datasets 
         for lr in learning_rates
         if isinstance(get_final_log_loss('VanillaSGD', dataset, lr, 'Tuning'), float)
         and get_final_log_loss('Umax', dataset, lr, 'Original' if dataset == 'Eurlex' else 'Tuning') is not None]
print(np.mean(ratio))

0.9521969255


True