In [1]:
import pandas as pd
from statistics import mean, median
from projects import project_list
from collections import Counter

In [2]:
data = pd.read_csv('ci_skipped_dynamic_variants.csv')
algorithms = ['BATCHBISECT', 'BATCHSTOP4', 'BATCHDIVIDE4']

In [3]:
df = pd.DataFrame()
lines = []

In [4]:
for p in project_list:
    
    pname = p.split('/')[1]
    p_data = data[ data.project == pname]
    if len(p_data) == 0:
        continue
    
    for alg in algorithms:
        alg_data = p_data[ p_data.algorithm == alg]
        saves = alg_data['builds_saved'].tolist()
        a = saves.index(max(saves))
        
        row = alg_data.iloc[a]
        df = df.append(row)

In [5]:
df

Unnamed: 0,project,algorithm,batch_size,update_method,factor,num_of_builds,builds_saved,testall_size
16,junit-team-junit5.csv,BATCHBISECT,-1.0,stagger,3.0,514.0,65.937707,1509.0
22,junit-team-junit5.csv,BATCHSTOP4,-1.0,random_linear,-1.0,444.0,70.576541,1509.0
13,junit-team-junit5.csv,BATCHDIVIDE4,-1.0,exponential,2.0,488.0,67.660702,1509.0
42,ratpack-ratpack.csv,BATCHBISECT,-1.0,stagger,2.0,1631.0,1.509662,1656.0
44,ratpack-ratpack.csv,BATCHSTOP4,-1.0,stagger,2.0,1336.0,19.323671,1656.0
...,...,...,...,...,...,...,...,...
1314,killbill-killbill.csv,BATCHSTOP4,-1.0,stagger,3.0,3808.0,39.565148,6301.0
1310,killbill-killbill.csv,BATCHDIVIDE4,-1.0,exponential,3.0,4147.0,34.185050,6301.0
1339,checkstyle-checkstyle.csv,BATCHBISECT,-1.0,stagger,3.0,7762.0,81.583068,42146.0
1335,checkstyle-checkstyle.csv,BATCHSTOP4,-1.0,exponential,3.0,6572.0,84.406587,42146.0


In [6]:
df.to_csv('best_dynamic_batching.csv')

In [7]:
for alg in algorithms:
    alg_data = df[ df.algorithm == alg]
    
    best_methods = alg_data['update_method'].tolist()
    best_factors = alg_data['factor'].tolist()
    best_techniques = []
    
    for i in range(len(best_methods)):
        best_techniques.append(best_methods[i]+'*'+str(best_factors[i]))
    
    print(alg)
    print(Counter(best_techniques))
    print()

BATCHBISECT
Counter({'stagger*2.0': 21, 'stagger*3.0': 15, 'exponential*2.0': 4, 'linear*4.0': 3, 'linear*3.0': 2, 'random_linear*-1.0': 2, 'exponential*3.0': 1, 'linear*2.0': 1, 'random_exponential*-1.0': 1})

BATCHSTOP4
Counter({'exponential*2.0': 14, 'stagger*2.0': 9, 'stagger*3.0': 8, 'random_linear*-1.0': 7, 'linear*4.0': 6, 'linear*2.0': 2, 'exponential*3.0': 2, 'random_exponential*-1.0': 1, 'linear*3.0': 1})

BATCHDIVIDE4
Counter({'exponential*3.0': 33, 'exponential*2.0': 8, 'random_exponential*-1.0': 4, 'stagger*3.0': 3, 'stagger*2.0': 1, 'linear*3.0': 1})



In [12]:
df = pd.read_csv('static_baseline_batching.csv')

In [13]:
projects = set(df['project'].tolist())

In [15]:
for p in projects:
    p_data = df[ df.project == p]
    builds = p_data['builds_saved'].tolist()
    algs = p_data['algorithm'].tolist()
    print(p_data)
    print(algs[builds.index(max(builds))])

                   project     algorithm  batch_size  builds_saved  \
200  keycloak-keycloak.csv   BATCHBISECT           2     34.838710   
201  keycloak-keycloak.csv   BATCHBISECT           4     46.838710   
202  keycloak-keycloak.csv   BATCHBISECT           8     49.032258   
203  keycloak-keycloak.csv   BATCHBISECT          16     47.870968   
204  keycloak-keycloak.csv    BATCHSTOP4           4     55.483871   
205  keycloak-keycloak.csv    BATCHSTOP4           8     57.634409   
206  keycloak-keycloak.csv    BATCHSTOP4          16     56.473118   
207  keycloak-keycloak.csv  BATCHDIVIDE4           4      0.000000   
208  keycloak-keycloak.csv  BATCHDIVIDE4           8     49.032258   
209  keycloak-keycloak.csv  BATCHDIVIDE4          16     49.032258   

     median_delay  testall_size  
200           1.0          2325  
201           2.0          2325  
202           4.0          2325  
203           8.0          2325  
204           2.0          2325  
205           4.0        