In [1]:
################# - test_run_parameters - ##################
q_values = ["0.01", "0.25",  "0.5", "0.75", "0.90", "0.95", "0.99"]
q_default = "0.99"

n_values = ["1000000", "5000000", "7500000", "10000000"]
n_default = "5000000"

d_values = ["1", "2", "3", "4", "5", "6", "7", "8"]
d_values_restrict = ["1","3","4","7"]
d_default = "1"

e_values = ["0.5", "1", "1.5", "2", "2.5", "3", "3.5", "4", "4.5", "5"]
e_default = ["1.0", "2.0", "3.0", "4.0", "5.0"]

s_base = 16033099
s_step = 127
reps = 100
############################################################

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import scipy.stats as stats
import math

In [3]:
def read_dist_data(algo):
    distro_data = [pd.read_csv(f'Test_of_{algo}/test_d_1/test_d_1_e_1.0_1.csv')]
    distro_data[0]['std'] = 0.0
    distro_data[0]['cil'] = 0.0
    distro_data[0]['cir'] = 0.0
    for i in range(1,len(d_values)+1):
        distro_data.append(pd.DataFrame(columns=distro_data[0].columns))
        for j in e_default:
            data_j = pd.read_csv(f'Test_of_{algo}/test_d_{i}/test_d_{i}_e_{j}_1.csv')
            for k in range(2,reps+1):
                data_j = pd.concat([pd.read_csv(f'Test_of_{algo}/test_d_{i}/test_d_{i}_e_{j}_{k}.csv'), data_j], ignore_index=True)
            data_j['d'] = i
            data_j = pd.concat([data_j, pd.DataFrame([data_j.mean()])], ignore_index=True)
            data_j['std'] = data_j['nae'].std()
            ci = stats.t.interval(0.95, df=reps-1, loc = data_j['nae'].mean(), scale = data_j['nae'].std()/math.sqrt(reps))
            data_j['cil'] = ci[0]
            data_j['cir'] = ci[1]
            data_j.drop(index=range(0,reps), inplace=True)
            distro_data[i] = pd.concat([distro_data[i], data_j], ignore_index=True)
        distro_data[i] = distro_data[i].astype({'d': 'int8'})
    distro_data.pop(0)
    distro_data = pd.concat(distro_data, ignore_index=False)
    distro_data.index = pd.Index(range(1,len(distro_data)+1))
    return distro_data
            

In [4]:
def read_eps_data(algo):
    eps_data = pd.read_csv(f'Test_of_{algo}/test_e_0.5/test_e_0.5_1.csv')
    eps_data['std'] = 0.0
    eps_data['cil'] = 0.0
    eps_data['cir'] = 0.0
    eps_data['d'] = 1
    for j in e_values:
        data_j = pd.read_csv(f'Test_of_{algo}/test_e_{j}/test_e_{j}_1.csv')
        for k in range(2,reps+1):
            data_j = pd.concat([pd.read_csv(f'Test_of_{algo}/test_e_{j}/test_e_{j}_{k}.csv'), data_j], 
                             ignore_index=True)
        data_j['d'] = 1
        data_j = pd.concat([data_j, pd.DataFrame([data_j.mean()])], ignore_index=True)
        data_j['std'] = data_j['nae'].std()
        ci = stats.t.interval(0.95, df=reps-1, loc = data_j['nae'].mean(), scale = data_j['nae'].std()/math.sqrt(reps))
        data_j['cil'] = ci[0]
        data_j['cir'] = ci[1]
        data_j.drop(index=range(0,reps), inplace=True)
        eps_data = pd.concat([eps_data, data_j], ignore_index=True)
        eps_data = eps_data.astype({'d': 'int8'})
    eps_data.drop(index=[0], inplace=True)
    eps_data.index = pd.Index(range(1,len(eps_data)+1))
    return eps_data
    

In [5]:
def read_n_data(algo):
    n_data = [pd.read_csv(f'Test_of_{algo}/test_n_{n_values[0]}/test_n_{n_values[0]}_e_1.0_1.csv')]
    n_data[0]['std'] = 0.0
    n_data[0]['cil'] = 0.0
    n_data[0]['cir'] = 0.0
    for i in range(1,len(n_values)+1):
        n_data.append(pd.DataFrame(columns=n_data[0].columns))
        for j in e_default:
            data_j = pd.read_csv(f'Test_of_{algo}/test_n_{n_values[i-1]}/test_n_{n_values[i-1]}_e_{j}_1.csv')
            for k in range(2,reps+1):
                data_j = pd.concat(
                    [pd.read_csv(f'Test_of_{algo}/test_n_{n_values[i-1]}/test_n_{n_values[i-1]}_e_{j}_{k}.csv'), data_j], 
                    ignore_index=True
                )
            data_j['d'] = 1
            data_j = pd.concat([data_j, pd.DataFrame([data_j.mean()])], ignore_index=True)
            data_j['std'] = data_j['nae'].std()
            ci = stats.t.interval(0.95, df=reps-1, loc = data_j['nae'].mean(), scale = data_j['nae'].std()/math.sqrt(reps))
            data_j['cil'] = ci[0]
            data_j['cir'] = ci[1]
            data_j.drop(index=range(0,reps), inplace=True)
            n_data[i] = pd.concat([n_data[i], data_j], ignore_index=True)
        n_data[i] = n_data[i].astype({'d': 'int8'})
    n_data.pop(0)
    n_data = pd.concat(n_data, ignore_index=False)
    n_data.index = pd.Index(range(1,len(n_data)+1))
    return n_data


In [6]:
def read_q_data(algo):
    q_data = [pd.read_csv(f'Test_of_{algo}/test_q_{q_values[0]}/test_q_{q_values[0]}_e_1.0_d_1_1.csv')]
    q_data[0]['std'] = 0.0
    q_data[0]['cil'] = 0.0
    q_data[0]['cir'] = 0.0
    for i in range(1,len(q_values)+1):
        q_data.append(pd.DataFrame(columns=q_data[0].columns))
        for d in d_values_restrict:
            for j in e_default:
                data_j = pd.read_csv(f'Test_of_{algo}/test_q_{q_values[i-1]}/test_q_{q_values[i-1]}_e_{j}_d_{d}_1.csv')
                for k in range(2,reps+1):
                    data_j = pd.concat(
                        [pd.read_csv(f'Test_of_{algo}/test_q_{q_values[i-1]}/test_q_{q_values[i-1]}_e_{j}_d_{d}_{k}.csv'), data_j], 
                        ignore_index=True
                    )
                data_j['d'] = d
                data_j = data_j.astype({'d': 'int'})
                data_j = pd.concat([data_j, pd.DataFrame([data_j.mean()])], ignore_index=True)
                data_j['std'] = data_j['nae'].std()
                ci = stats.t.interval(0.95, df=reps-1, loc = data_j['nae'].mean(), scale = data_j['nae'].std()/math.sqrt(reps))
                data_j['cil'] = ci[0]
                data_j['cir'] = ci[1]            
                data_j.drop(index=range(0,reps), inplace=True)
                q_data[i] = pd.concat([q_data[i], data_j], ignore_index=True)
        #q_data[i] = q_data[i].astype({'d': 'int'})
    q_data.pop(0)
    q_data = pd.concat(q_data, ignore_index=False)
    q_data.index = pd.Index(range(1,len(q_data)+1))
    return q_data

In [7]:
for algo in ['ezq-sw', 'ldpq', 'frugal1u-rr', 'frugal2u-sw']:
    read_dist_data(algo).to_csv(f'test_on_d_{algo}.csv')
    read_eps_data(algo).to_csv(f'test_on_e_{algo}.csv')
    read_n_data(algo).to_csv(f'test_on_n_{algo}.csv')
    read_q_data(algo).to_csv(f'test_on_q_{algo}.csv')