In [1]:
import pandas as pd
from statistics import median

In [2]:
df = pd.read_csv('ci_skip_all_results.csv')

In [3]:
df['final_methods'] = df['algorithm'] + ' ' + df['update_method'] + ' ' + df['factor'].astype(str)

In [4]:
bd = df[ df['method'] == 'baseline_dynamic']
bs = df[ df['method'] == 'baseline_static']
ld = df[ df['method'] == 'new_dynamic']

In [5]:
algorithms = ['BATCHBISECT', 'BATCHSTOP4', 'BATCHDIVIDE4']

In [6]:
dyn_df = pd.DataFrame()
sta_df = pd.DataFrame()

In [7]:
ld = ld.sort_values(by='project')
project_list = list(set(ld['project'].tolist()))

In [8]:
bd = bd[bd['project'].isin(project_list)]

In [9]:
dyn_df['project'] = project_list
sta_df['project'] = project_list

In [10]:
median_differences_dyn = {}
median_differences_sta = {}

In [11]:
for alg in algorithms:
    
    
    median_differences_dyn[alg] = []
    median_differences_sta[alg] = {}
    
    bd_alg = bd[ (bd['algorithm'] == alg) & (bd['ci_skip'] == 0)]
    bs_alg = bs[ (bs['algorithm'] == alg) & (bs['ci_skip'] == 0)]
    ld_alg = ld[ (ld['algorithm'] == alg) & (ld['ci_skip'] == 0)]
    
    bd_alg = bd_alg.sort_values(by='project')
    final_methods_lwd = list(set(ld_alg['final_methods'].tolist())) 
    
    dyn_col_name = 'Dynamic Baseline ' + alg
    dyn_df[dyn_col_name] = bd_alg['builds_saved'].tolist()
    
    batch_sizes = list(set(bs_alg['batch_size'].tolist()))
    
    for b in batch_sizes:
        
        median_differences_sta[alg][b] = []
        
        bs_alg_b = bs_alg[ bs_alg['batch_size'] == b]
        
        bs_alg_b = bs_alg_b.sort_values(by='project')
        
        static_col_name = 'Static Baseline ' + alg + ' ' + str(b)
        sta_df[static_col_name] = bs_alg_b['builds_saved'].tolist()
    
    
        for f in final_methods_lwd:

            ld_alg_f = ld_alg[ ld_alg['final_methods'] == f]

            dyn_df[f] = ld_alg_f['builds_saved'].tolist()
            sta_df[f] = ld_alg_f['builds_saved'].tolist()
            
            median_dyn = median(dyn_df[dyn_col_name] - dyn_df[f])
            median_sta = median(sta_df[static_col_name] - sta_df[f])
            
            median_differences_dyn[alg].append(median_dyn)
            median_differences_sta[alg][b].append(median_sta)
            
            

In [12]:
dyn_df.to_csv('dyn_lwd_builds_saved.csv')
sta_df.to_csv('static_lwd_builds_saved.csv')

In [13]:
for alg in median_differences_dyn:
    print('Alg {} = {}'.format(alg, median(median_differences_dyn[alg])))

Alg BATCHBISECT = -0.8985084692307828
Alg BATCHSTOP4 = -3.0648708081363374
Alg BATCHDIVIDE4 = 3.69738149123296


In [15]:
for alg in median_differences_sta:
    medians = []
    for b in median_differences_sta[alg]:
        
        print('Alg {} Batch {} = {}'.format(alg, b, median(median_differences_sta[alg][b])))
        medians.append(median(median_differences_sta[alg][b]))
    
    print('\n\n')
    print('{} Median = {} \n\n'.format(alg, median(medians)))

Alg BATCHBISECT Batch 8 = -10.040590392910659
Alg BATCHBISECT Batch 16 = -10.450696232134295
Alg BATCHBISECT Batch 2 = -20.35451181649609
Alg BATCHBISECT Batch 4 = -11.022451830245268



BATCHBISECT Median = -10.736574031189782 


Alg BATCHSTOP4 Batch 8 = -7.391298712600516
Alg BATCHSTOP4 Batch 16 = -8.867907989161509
Alg BATCHSTOP4 Batch 4 = -8.729705803750392



BATCHSTOP4 Median = -8.729705803750392 


Alg BATCHDIVIDE4 Batch 8 = -1.3981765103470103
Alg BATCHDIVIDE4 Batch 16 = -10.944708818152797
Alg BATCHDIVIDE4 Batch 4 = -1.6384071270301632



BATCHDIVIDE4 Median = -1.6384071270301632 


