In [None]:
import numpy as np
import matplotlib.pyplot as plt
import operator

%matplotlib inline

In [None]:
n_samples_features = {
    'mushrooms': (8124, 112),
    'a9a': (32561, 123),
    'cod-rna': (59535, 8),
    'ijcnn1': (49990, 22),
    'covtype': (581012, 54),
    'w8a': (49749, 300),
    'quantum': (50000, 65),
    'protein': (145751, 74),
    'SUSY': (5000000, 18),
    'alpha': (500000, 500),
    'beta': (500000, 500),
    'gamma': (500000, 500),
    'delta': (500000, 500),
    'epsilon': (500000, 2000),
    'zeta': (500000, 2000),
    'fd': (5469800, 900),
    'ocr': (3500000, 1156),
    'mnist8m': (8100000, 784),
    'dna18': (18000000, 800)
}

datasets = list(n_samples_features.keys())

# sort by SO2 time
vals = {}
for dataset in datasets:
    N, D = n_samples_features[dataset]
    vals[dataset] = N * D
sorted_datasets = [p[0] for p in sorted(vals.items(), key=operator.itemgetter(1))]
print(sorted_datasets)

# plot results
for dataset in datasets:
    res_so2 = np.loadtxt(dataset + '_SO2.dat', skiprows=1)
    res_sag = np.loadtxt(dataset + '_SAG.dat', skiprows=1)
    res_sgd = np.loadtxt(dataset + '_SGD.dat', skiprows=1)
    res_newton = np.loadtxt(dataset + '_newton.dat', skiprows=1)
    res_hfn = np.loadtxt(dataset + '_HFN.dat', skiprows=1)
    res_bfgs = np.loadtxt(dataset + '_BFGS.dat', skiprows=1)
    res_lbfgs = np.loadtxt(dataset + '_LBFGS.dat', skiprows=1)
    
    f_opt = min(res_so2[-1, 2], res_sag[-1, 2], res_newton[-1, 2],
                res_hfn[-1, 2], res_bfgs[-1, 2], res_lbfgs[-1, 2])

    for idx in [0, 1]:
        x_max = max(res_so2[-1, idx], res_sag[-1, idx], res_newton[-1, idx],
                    res_hfn[-1, idx], res_bfgs[-1, idx], res_lbfgs[-1, idx])
        x_max = min(x_max, 10 * res_so2[-1, idx])
        
        fig = plt.figure()
        ax = fig.add_axes([0.15, 0.12, 0.6, 0.75])
        if idx == 0:
            ax.set_xlabel('Epoch')
        else:
            if x_max > 3600:
                units = 'hr'
                x_max /= 3600
                res_sag[:, idx] /= 3600
                res_sgd[:, idx] /= 3600
                res_newton[:, idx] /= 3600
                res_hfn[:, idx] /= 3600
                res_bfgs[:, idx] /= 3600
                res_lbfgs[:, idx] /= 3600
                res_so2[:, idx] /= 3600
            elif x_max > 60:
                units = 'min'
                x_max /= 60
                res_sag[:, idx] /= 60
                res_sgd[:, idx] /= 60
                res_newton[:, idx] /= 60
                res_hfn[:, idx] /= 60
                res_bfgs[:, idx] /= 60
                res_lbfgs[:, idx] /= 60
                res_so2[:, idx] /= 60
            else:
                units = 'sec'
            ax.set_xlabel('Time (%s)' % units)
        ax.set_ylabel('Objective minus optimum')
        
        ax.semilogy(res_sag[:, idx], res_sag[:, 2] - f_opt, label='SAG', color='b', linewidth=3)
        ax.semilogy(res_sgd[:, idx], res_sgd[:, 2] - f_opt, label='SGD', color='k', linestyle='--', linewidth=3)
        ax.semilogy(res_newton[:, idx], res_newton[:, 2] - f_opt, label='Newton', color='k',
                    linewidth=3, drawstyle='steps-post')
        ax.semilogy(res_hfn[:, idx], res_hfn[:, 2] - f_opt, label='HFN', color='g',
                    linewidth=3, drawstyle='steps-post')
        ax.semilogy(res_bfgs[:, idx], res_bfgs[:, 2] - f_opt, label='BFGS', color='y',
                    linewidth=3, drawstyle='steps-post')
        ax.semilogy(res_lbfgs[:, idx], res_lbfgs[:, 2] - f_opt, label='L-BFGS', color='m',
                    linewidth=3, drawstyle='steps-post')
        ax.semilogy(res_so2[:, idx], res_so2[:, 2] - f_opt, label='IN', color='r', linewidth=5)
        
        N, D = n_samples_features[dataset]
        mem = N * D * 8 / 1024 / 1024 / 1024
        suf = 'GB'
        if mem < 1:
            mem *= 1024
            suf = 'MB'
        ax.set_title('dataset={}, N={:,}, D={:,}, memory={:,.2f} {}'.format(dataset, N, D, mem, suf))
        
        ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)
        ax.grid()
        ax.set_xlim(0, x_max)
        
        if idx == 0:
            plt.savefig('%s_epoch.pdf' % dataset)
        else:
            plt.savefig('%s_time.pdf' % dataset)