# Comparison of double-sum formulations
The final log-loss of U-max using the proposed double-sum formulation and Raman's double-sum formulation are compared.

In [39]:
import pickle
import numpy as np
import os.path
import math

In [42]:
def get_final_log_loss(sgd_name, dataset, learning_rate, directory):
    """Returns final log loss for U-max on the Eurlex dataset.
    
    tilde:              True = Raman's formulation, 
                        False = proposed formulation
    learning_rate:      Learning rate
    """
    directory_name = 'Results/Complete/' + directory + '/'
    file_name = (directory_name 
                 + sgd_name + '_'
                 + dataset + '_'
                 + ('lr_' if directory == 'Tuning' else '')
                 + learning_rate
                 + ('_prop_data_0.1' if directory == 'Tuning' else '')
                 +'.p')

    if os.path.isfile(file_name):
        with open(file_name, 'rb') as f:
            results = pickle.load(f)
            final_log_loss = np.mean(results['train'][:, :, 1], axis=0)[-1]
            return None if math.isnan(final_log_loss) else final_log_loss
        

In [3]:
learning_rates = ['1000.0', '100.0', '10.0', '1.0', '0.1', '0.01', '0.001']

In [4]:
# Gather final log-losses
relative_log_losses = [(get_final_log_loss('tilde_Umax', 'Eurlex', lr, 'Original') 
                       / get_final_log_loss('Umax', 'Eurlex', lr, 'Original'))
                       for lr in learning_rates]

In [5]:
# Display results

print('Relative log_losses: {}'.format(relative_log_losses))
mean_relative_log_losses = np.mean(relative_log_losses)
print('\nMean relative log_loss: {}'.format(mean_relative_log_losses))

Relative log_losses: [4.1697272310583493, 4.2467509260447054, 4.0496241976833636, 3.6603675901527217, 2.3218590387451092, 1.686531567322741, 1.4035453259455466]

Mean relative log_loss: 3.076915125278934


# Similar calculations comparing Umax and Vanilla SGD

In [48]:
datasets = ['Eurlex', 'wikiSmall', 'AmazonCat', 'wiki10', 'Delicious', 'Bibtex', 'mnist']
learning_rates = ['100.0', '10.0', '1.0', '0.1', '0.01', '0.001']
[(get_final_log_loss('Umax', dataset, lr, 'Original' if dataset == 'Eurlex' else 'Tuning') ,dataset ,lr)
 for dataset in datasets 
 for lr in learning_rates
 if isinstance(get_final_log_loss('VanillaSGD', dataset, lr, 'Tuning'), float)]#

[(1835.1088422971784, 'Eurlex', '100.0'),
 (189.8172166785443, 'Eurlex', '10.0'),
 (18.729691386271206, 'Eurlex', '1.0'),
 (2.0354487273675277, 'Eurlex', '0.1'),
 (2.4123893994610692, 'Eurlex', '0.01'),
 (4.179664609026239, 'Eurlex', '0.001'),
 (459833.50452787575, 'wikiSmall', '100.0'),
 (47119.402964551358, 'wikiSmall', '10.0'),
 (4700.0686076358443, 'wikiSmall', '1.0'),
 (477.81099390642265, 'wikiSmall', '0.1'),
 (49.105579853269589, 'wikiSmall', '0.01'),
 (9.4242557362545973, 'wikiSmall', '0.001'),
 (23038.543590446283, 'AmazonCat', '100.0'),
 (2278.0358779783346, 'AmazonCat', '10.0'),
 (227.66827834491139, 'AmazonCat', '1.0'),
 (23.02143153824845, 'AmazonCat', '0.1'),
 (4.9821618158893841, 'AmazonCat', '0.01'),
 (5.1760768302992437, 'AmazonCat', '0.001'),
 (22484.191898148267, 'wiki10', '100.0'),
 (2247.1108630260346, 'wiki10', '10.0'),
 (225.5156840036083, 'wiki10', '1.0'),
 (21.712564780609007, 'wiki10', '0.1'),
 (7.0357752992754765, 'wiki10', '0.01'),
 (7.9377412128508134, 'wik