## Run all experiments

In [1]:
import math
import numpy as np
import os
from Dataset_generation import get_dataset, get_q_batches, get_q_true, get_shuffled_dataset
from Output_data_generation import initialize_folders, get_settings, get_file_name, save_data
from Quantile_procs import get_procs, get_res

In [2]:
# sgd_lst = ['distro', 'data_size', 'step_size', 'data_sequence']        
tau_vals = [0.1, 0.3, 0.5, 0.9, 0.99]

In [3]:
# Get quantile estimates
def get_q_ests(dataset, tau_lst, method_name, stepsize):
    print ("get_q_ests using method_name {}".format(method_name))
    if len(dataset.shape)>2:
        raise Exception('Dataset for q_est calculation of wrong shape: {}, should be 1d or 2d array'
                        .format(str(dataset.shape)))
    if len(dataset.shape)==1:
        procs = get_procs(dataset, tau_lst, method_name, stepsize=stepsize)
        res = get_res(procs)
    else:
        res = np.zeros((dataset.shape[0], len(tau_lst)))
        procs = np.zeros((dataset.shape[0], len(tau_lst), dataset.shape[1]))
        for idx, dt in enumerate(dataset):
            procs[idx] = get_procs(dt, tau_lst, method_name, stepsize)
            res[idx] = get_res(procs[idx])
    return res, procs

In [4]:
def get_normalized_e(true, batches, est):
    upper = est - batches
    bottom = true - batches
    return (upper/bottom)

In [5]:
# The main function that takes a setting 

def quantile_method_compare(folder_name, distro_lst, datasize_lst, stepsize_lst, 
                         MULTI_GENERATION=False, SHUFFLE=False, tau_lst=[0.1, 0.3, 0.5, 0.9, 0.99], 
                        method_name='SGD',
                        ):
    
    if MULTI_GENERATION and SHUFFLE: raise Exception("MULTI_GENERATION and SHUFFLE can't both be true")
    
    # generate different settings
    setting_lst, changed_setting = get_settings(distro_lst, datasize_lst, stepsize_lst,)
    print (setting_lst)
    
    # if only stepsize changes, generate dataset and q_batches
    dataset, q_batches = 0, 0
    MULTI_DATASET = False if len(distro_lst)==1 and len(datasize_lst)==1 else True
    if not MULTI_DATASET:
        dataset = get_dataset(distro_lst[0], datasize_lst[0], MULTI_GENERATION)
        q_batches = get_q_batches(dataset, tau_lst)
        
    # each setting = [distro, datasize, stepsize]
    for idx, setting in enumerate(setting_lst):
        
        # generate all the data
        distro, datasize, stepsize = setting[0], int(setting[1]), setting[2]
        q_true = get_q_true(distro, tau_lst)
        if MULTI_DATASET:
            # if dataset needs to be updated
            dataset = get_dataset(distro, datasize, MULTI_GENERATION)
            q_batches = get_q_batches(dataset, tau_lst)
        if SHUFFLE: 
            dataset = get_shuffled_dataset(dataset, datasize, SHUFFLE)
        q_est_res, q_est_proc = get_q_ests(dataset, tau_lst, method_name, stepsize)
        E = get_normalized_e(q_true, q_batches, q_est_res)
        
        data_dict = {
            'q_true': q_true,
            'q_batches': q_batches,
            'q_est_res': q_est_res,
            'q_est_proc': q_est_proc,
            'E': E
        }
        
        # write the quantiles estimation results into files
        file_name = get_file_name(changed_setting, distro, datasize, stepsize, SHUFFLE)
        save_data(folder_name, file_name, tau_lst, data_dict, method_name)

In [7]:
def all_method_comparisons(compare_lst, args_dict, root_folder='SGD'):

    for t in compare_lst:
        print (t)
        folder_name = "Experiment_results/{}/{}/".format(root_folder, t)
        
        args = args_dict.get(t)
        distro_lst, datasize_lst, stepsize_lst =  args[0], args[1], args[2]
        MULTI_GENERATION = args[3]
        SHUFFLE = False if len(args)<5 else args[4]
        tau_lst = tau_vals
        
        quantile_method_compare(folder_name, distro_lst, datasize_lst, stepsize_lst, MULTI_GENERATION, SHUFFLE, tau_lst, method_name=root_folder)
            

 ### Settings

In [8]:
# Default settings, please don't change

distros = ['mix', 'gau_1', 'gau_2', 'exp']
stepsizes = ['const', '2_div_sqrt_k', '0.002_div_sqrt_k']

N_g = 12 # N_generation
N_s = 10 # N_shuffle

In [9]:
# Run all functions
Folder_options = ['SGD', 'Frugal', 'Adaptive', 'P2',]
main_folder = 'SGD'
compare_lst = ['distro', 'data_size', 'data_sequence']

    
args_dict_SGD = {
    'distro': (distros, [1000], ['const'], True),
    'data_size': (['gau_1'], [100, 1000, 100000], ['const'], True),
    'step_size': (['gau_1'], [1000], stepsizes, True),
    'data_sequence':(['gau_1', 'gau_1'], [1000], ['const'], False, True),
}

# shiftQ needs positive inputs
args_dict_shift_Q = {
    'distro': (['positive mix', 'positive gau_1', 'positive gau_2', 'positive exp'], [1000], ['const'], True),
    'data_size': (['positive gau_1'], [100, 1000, 10000], ['const'], True),
    'data_sequence':(['positive gau_1', 'positive gau_1'], [1000], ['const'], False, True),
}

# initialize_folders(main_fd = 'Experiment_results/', fd_lst = [main_folder], 
#                        sub_fd_lst = [str(main_folder)+'/'], subsub_fd_lst = ['data_size'])

# all_method_comparisons(['step_size'], args_dict, main_folder)

### Run this one to generate all experimental results

In [10]:
main_folder = 'Adaptive_stepsize'
compare_lst = ['distro', 'data_size', 'step_size']

args_dict_adapt = {
    'distro': (distros, [1000], ['const'], True),
    'data_size': (['gau_2'], [100, 1000, 10000], ['const'], True),
    'step_size': (['gau_1'], [1000], stepsizes, True),
}
initialize_folders(main_fd = 'Experiment_results/', fd_lst = [main_folder], 
                       sub_fd_lst = [str(main_folder)+'/'], subsub_fd_lst = compare_lst)

all_method_comparisons(compare_lst, args_dict_adapt, main_folder)

distro
False
[['mix' '1000' 'const']
 ['gau_1' '1000' 'const']
 ['gau_2' '1000' 'const']
 ['exp' '1000' 'const']]
get_q_ests using method_name Adaptive_stepsize
Experiment_results/Adaptive_stepsize/distro/mix_q_true.txt
Experiment_results/Adaptive_stepsize/distro/mix_q_batches.txt
Experiment_results/Adaptive_stepsize/distro/mix_q_est_res.txt
Experiment_results/Adaptive_stepsize/distro/mix_q_est_proc.txt
Experiment_results/Adaptive_stepsize/distro/mix_E.txt
get_q_ests using method_name Adaptive_stepsize
Experiment_results/Adaptive_stepsize/distro/gau_1_q_true.txt
Experiment_results/Adaptive_stepsize/distro/gau_1_q_batches.txt
Experiment_results/Adaptive_stepsize/distro/gau_1_q_est_res.txt
Experiment_results/Adaptive_stepsize/distro/gau_1_q_est_proc.txt
Experiment_results/Adaptive_stepsize/distro/gau_1_E.txt
get_q_ests using method_name Adaptive_stepsize
Experiment_results/Adaptive_stepsize/distro/gau_2_q_true.txt
Experiment_results/Adaptive_stepsize/distro/gau_2_q_batches.txt
Experiment

In [11]:
# main_folder = 'SAG'
# initialize_folders(main_fd = 'Experiment_results/', fd_lst = [main_folder], 
#                        sub_fd_lst = [str(main_folder)+'/'], subsub_fd_lst = compare_lst)
# args_dict_SAG = {
#     'distro': (distros, [1000], [True], True),
#     'data_size': (['gau_1'], [100, 1000, 10000], [True], True),
#     'data_sequence':(['gau_1', 'gau_1'], [1000], [True], False, True),

# }
# all_method_comparisons(compare_lst, args_dict_SAG, main_folder)

In [12]:
# main_folder = 'mix'
# initialize_folders(main_fd = 'Experiment_results/', fd_lst = [main_folder], 
#                        sub_fd_lst = [str(main_folder)+'/'], subsub_fd_lst = ['data_size'])
# quantile_method_compare(folder_name = "Experiment_results/mix/data_size/",
#                         distro_lst = ['mix'], 
#                         datasize_lst = [1000, 10000, 100000],
#                         stepsize_lst = ['const'],
#                         MULTI_GENERATION = True,
#                         SHUFFLE = False, 
#                         tau_lst = tau_vals, 
#                         method_name="SGD")

### Run this to generate all plots

In [13]:
from Plot import plot_charts

plot_charts("Experiment_results/")

Experiment_results/Adaptive_stepsize/step_size/const_
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
Experiment_results/Adaptive_stepsize/step_size/2_div_sqrt_k_
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
Experiment_results/Adaptive_stepsize/step_size/0.002_div_sqrt_k_
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
Experiment_results/Adaptive_stepsize/distro/gau_1_
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
[0.1, 0.3, 0.5, 0.9, 0.99]
q label is  Adaptive_stepsize
Experiment_results/Adaptive_stepsize/distro/mix_
[0.1, 0.3, 0.5, 0.9, 0.99]
q label i