In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

%matplotlib inline

In [None]:
reg_type = 'l1'
datasets = [
    {'name': 'a9a', 'n': 32561, 'd': 123},
    {'name': 'mushrooms', 'n': 8124, 'd': 112},
    {'name': 'ijcnn1', 'n': 49990, 'd': 22},
    {'name': 'cod-rna', 'n': 59535, 'd': 8},
    {'name': 'covtype', 'n': 581012, 'd': 54},
    {'name': 'w8a', 'n': 49749, 'd': 300},
    {'name': 'protein', 'n': 145751, 'd': 74},
    {'name': 'quantum', 'n': 50000, 'd': 65},
    {'name': 'SUSY', 'n': 5000000, 'd': 18},
    {'name': 'alpha', 'n': 500000, 'd': 500},
]

In [None]:
# Plot all results

methods = [
    {'file': '%s.%s.NIM.minibatch_size=100.dat', 'name': 'NIM', 'code': 'nim'},
    {'file': '%s.%s.SAG.minibatch_size=10.dat', 'name': 'SAG', 'code': 'sag'},
    {'file': '%s.%s.newton.exact=0.dat', 'name': 'Newton Inexact', 'code': 'newton_inexact'},    
]
if reg_type == 'l2':
    methods += [
        {'file': '%s.%s.LBFGS.dat', 'name': 'LBFGS', 'code': 'lbfgs'},
    ]

for dataset in datasets:
    # Find optimal function value
    f_opt = np.inf
    for method in methods:
        res_table = np.loadtxt(method['file'] % (reg_type, dataset['name']), skiprows=1)
        f_opt = min(f_opt, np.min(res_table[:, 2]))
        
    # Plot results
    for idx in [0, 1]:
        plt.figure()
        for method in methods:
            res_table = np.loadtxt(method['file'] % (reg_type, dataset['name']), skiprows=1)
            plt.semilogy(res_table[:, idx], res_table[:, 2] - f_opt,
                         label=method['name'], linewidth=2)
        plt.grid()
        plt.legend()
        plt.ylim(1e-13, plt.ylim()[1])
        if idx == 0:
            plt.xlabel('Epoch')
            plt.xlim(0, 50)
            add_name = 'epoch'
        else:
            plt.xlabel('Time')
            add_name = 'time'
        plt.ylabel('Function minus optimum')
        plt.title('Reg_type=%s, Dataset=%s (n=%d, d=%d)' %
                  (reg_type, dataset['name'], dataset['n'], dataset['d']))
        plt.savefig('pdf/results.%s.%s.%s.pdf' % (reg_type, dataset['name'], add_name))
        
# Merge into one PDF
for tt in ['epoch', 'time']:
    fnames = []
    for dataset in datasets:
        filename = 'pdf/results.%s.%s.%s.pdf' % (reg_type, dataset['name'], tt)
        fnames.append(filename)
    os.system('pdfunite %s pdf/final/results.%s.%s.pdf' % (' '.join(fnames), reg_type, tt))

In [None]:
# # Plot different minibatch size

# minibatch_sizes = [2, 10, 100, 1000, 5000, 10000, 30000]
# methods = ['NIM']

# for dataset in datasets:
#     # Find optimal function value
#     f_opt = np.inf
#     for method in methods:
#         for minibatch_size in minibatch_sizes:
#             if minibatch_size > dataset['n']: continue
                
#             res_table = np.loadtxt('%s.%s.%s.minibatch_size=%d.dat' %
#                                    (reg_type, dataset['name'], method, minibatch_size), skiprows=1)
#             f_opt = min(f_opt, np.min(res_table[:, 2]))
    
#     # Plot results
#     for method in methods:
#         for idx in [0, 1]:
#             plt.figure()
#             for minibatch_size in minibatch_sizes:
#                 if minibatch_size > dataset['n']: continue
                    
#                 res_table = np.loadtxt('%s.%s.%s.minibatch_size=%d.dat' %
#                                    (reg_type, dataset['name'], method, minibatch_size), skiprows=1)
#                 plt.semilogy(res_table[:, idx], res_table[:, 2] - f_opt,
#                              label='%s %d' % (method, minibatch_size), linewidth=2)
#             plt.grid()
#             plt.legend()
#             plt.ylim(1e-13, plt.ylim()[1])
#             if idx == 0:
#                 add_name = 'epoch'
#                 plt.xlabel('Epoch')
#             else:
#                 add_name = 'time'
#                 plt.xlabel('Time')
#             plt.ylabel('Function minus optimum')
#             plt.title('Reg_type=%s, Dataset=%s (n=%d, d=%d)' %
#                       (reg_type, dataset['name'], dataset['n'], dataset['d']))
#             plt.savefig('pdf/minibatch_sizes.%s.%s.%s.%s.pdf' % (reg_type, dataset['name'], method, add_name))
            
# # Merge PDFs
# for method in methods:
#     for tt in ['epoch', 'time']:
#         fnames = []
#         for dataset in datasets:
#             filename = 'pdf/minibatch_sizes.%s.%s.%s.%s.pdf' % (reg_type, dataset['name'], method, tt)
#             fnames.append(filename)
#         os.system('pdfunite %s pdf/final/minibatch_sizes.%s.%s.%s.pdf' % (' '.join(fnames), reg_type, method, tt))