In [21]:
import pandas as pd
from projects import project_list
from batching_algs import *
from collections import Counter
import csv

In [22]:
algorithms = {'BATCHBISECT':batchbisect, 'BATCHSTOP4':batchstop4, 'BATCHDIVIDE4':batchdivide4}

In [23]:
r_file_name = 'mfu.csv'
result_file = open(r_file_name, 'w')
result_headers = ['project', 'algorithm', 'batch_size', 'update_method', 'factor', 'num_of_builds', 'builds_saved', 'testall_size']
writer = csv.writer(result_file)
writer.writerow(result_headers)

91

In [24]:
def output_values(tr_results):
    l = []
    i = 0
    while i < len(tr_results):
        if tr_results[i] == 'passed':
            l.append(1)
        else:
            l.append(0)
        i += 1
    
    return l

In [25]:
def mfu(results, factor, algorithm):
    
    max_batch_size = 16
    min_batch_size = 1
    batch_sizes = []
    
    i = 0
    builds = 0
    length = len(results)
    
    cur_batch_size = max_batch_size
    
    while i < length:
        
        batch = results[i:i+cur_batch_size]
        i = i+cur_batch_size
        batch_sizes.append(cur_batch_size)
        
        batch_total = algorithm(batch)
        builds += batch_total
        
        if 0 in batch:
            res = 0
        else:
            res = 1
        
        fails_per = 100*batch.count(0)/len(batch)
        #print(cur_batch_size, batch, batch_total, fails_per)
        
        if res == 0:
        
            if cur_batch_size <= factor:
                cur_batch_size = min_batch_size

            elif fails_per < 20:
                cur_batch_size -= factor

            elif fails_per < 50:
                cur_batch_size //= factor

            else:
                cur_batch_size = Counter(batch_sizes).most_common(1)[0][0]
        
        else:
            
            if cur_batch_size >= 16:
                cur_batch_size = max_batch_size
            else:
                cur_batch_size = min(cur_batch_size * factor, 16)
        
    
    builds_saved = 100-(100*builds/length)
    return builds, builds_saved

In [26]:
for p in project_list:
    print(p)
    pname = p.split('/')[1]
    
    try:
        data = pd.read_csv('ci_skip_data/extracted_project_travis/' + pname)
    except:
        continue
    
    branch_type = data['git_branch'].tolist()
    if 'master' in branch_type:
        data = data[ data['git_branch'] == 'master']
    else:
        data = data[ data['git_branch'] == 'trunk']
    
    data = data[ data.verdict == 0]
    data = data.iloc[100:]
    
    if len(data) == 0:
        continue
    
    results = output_values(data['tr_status'].tolist())
    
    for alg in algorithms:
        builds, saved = mfu(results, 2, algorithms[alg])
        writer.writerow([pname, str(alg), '-1', 'stagger_mfu', 2, builds, saved, len(results)])
        builds, saved = mfu(results, 3, algorithms[alg])
        writer.writerow([pname, str(alg), '-1', 'stagger_mfu', 2, builds, saved, len(results)])
    

junit-team-junit5/junit-team-junit5.csv
ratpack-ratpack/ratpack-ratpack.csv
p6spy-p6spy/p6spy-p6spy.csv
BuildCraft-BuildCraft/BuildCraft-BuildCraft.csv
apache-tajo/apache-tajo.csv
osmdroid-osmdroid/osmdroid-osmdroid.csv
DSpace-DSpace/DSpace-DSpace.csv
apache-nifi/apache-nifi.csv
caelum-vraptor4/caelum-vraptor4.csv
grails-grails-core/grails-grails-core.csv
square-wire/square-wire.csv
puniverse-quasar/puniverse-quasar.csv
dropwizard-dropwizard/dropwizard-dropwizard.csv
essentials-Essentials/essentials-Essentials.csv
mybatis-mybatis-3/mybatis-mybatis-3.csv
naver-pinpoint/naver-pinpoint.csv
FasterXML-jackson-databind/FasterXML-jackson-databind.csv
samtools-htsjdk/samtools-htsjdk.csv
deeplearning4j-deeplearning4j/deeplearning4j-deeplearning4j.csv
open-keychain-open-keychain/open-keychain-open-keychain.csv
yegor256-takes/yegor256-takes.csv
keycloak-keycloak/keycloak-keycloak.csv
RS485-LogisticsPipes/RS485-LogisticsPipes.csv
rackerlabs-blueflood/rackerlabs-blueflood.csv
xetorthio-jedis/xetort