In [1]:
import pandas as pd
from projects import project_list
from batching_algs import *
from collections import Counter
import csv

In [2]:
algorithms = {'BATCHBISECT':batchbisect, 'BATCHSTOP4':batchstop4, 'BATCHDIVIDE4':batchdivide4}

In [3]:
# r_file_name = 'mfu_s.csv'
# result_file = open(r_file_name, 'w')
# result_headers = ['project', 'algorithm', 'batch_size', 'update_method', 'factor', 'num_of_builds', 'builds_saved', 'testall_size']
# writer = csv.writer(result_file)
# writer.writerow(result_headers)

In [4]:
def output_values(tr_results):
    l = []
    i = 0
    while i < len(tr_results):
        if tr_results[i] == 'passed':
            l.append(1)
        else:
            l.append(0)
        i += 1
    
    return l

In [5]:
def mfu(results, factor, algorithm):
    
    max_batch_size = 16
    min_batch_size = 1
    batch_sizes = []
    
    i = 0
    builds = 0
    length = len(results)
    
    cur_batch_size = max_batch_size
    
    while i < length:
        
        batch = results[i:i+cur_batch_size]
        i = i+cur_batch_size
        batch_sizes.append(cur_batch_size)
        
        batch_total = algorithm(batch)
        builds += batch_total
        
        if 0 in batch:
            res = 0
        else:
            res = 1
        
        fails_per = 100*batch.count(0)/len(batch)
        #print(cur_batch_size, batch, batch_total, fails_per)
        
        if res == 0:
        
            if cur_batch_size <= factor:
                cur_batch_size = min_batch_size

            elif fails_per < 20:
                cur_batch_size -= factor

            elif fails_per < 50:
                cur_batch_size //= factor

            else:
                cur_batch_size = Counter(batch_sizes).most_common(1)[0][0]
        
        else:
            
            if cur_batch_size >= 16:
                cur_batch_size = max_batch_size
            else:
                cur_batch_size = min(cur_batch_size * factor, 16)
        
    
    builds_saved = 100-(100*builds/length)
    return builds, builds_saved

In [6]:
i = 1
final_file = []
r_file_name = 'mfu_s.csv'
result_file = open(r_file_name, 'a+')
writer = csv.writer(result_file)

for p in project_list:
        
    #print(p)
    pname = p.split('/')[1]
    if (pname == 'BuildCraft-BuildCraft.csv') | (pname == 'RS485-LogisticsPipes.csv') | (pname == 'datastax-java-driver.csv') :
        continue
    
    data = pd.read_csv('ci_skip_data/extracted_project_travis/' + pname)
    
    branch_type = data['git_branch'].tolist()
    if 'master' in branch_type:
        data = data[ data['git_branch'] == 'master']
    else:
        data = data[ data['git_branch'] == 'trunk']
    
    #data = data[ data.verdict == 0]       #------> we are omitting ci skip builds if this line is uncommented
    final_data = data.iloc[100:]
    
    if len(final_data) == 0:
        print(p)
        continue
    
    results = output_values(final_data['tr_status'].tolist())
    for alg in algorithms:
        builds, saved = mfu(results, 2, algorithms[alg])
        final_file.append([pname, str(alg), 0, '-1', 'new_dynamic', 'stagger_mfu', 2, builds, saved, len(results)])
        
        builds, saved = mfu(results, 3, algorithms[alg])
        final_file.append([pname, str(alg), 0, '-1', 'new_dynamic', 'stagger_mfu', 3, builds, saved, len(results)])
    
    
    final_data = data[ data.verdict == 0]       #------> we are omitting ci skip builds if this line is uncommented
    final_data = final_data.iloc[100:]
    
    if len(final_data) == 0:
        print(p)
        continue
    
    results = output_values(final_data['tr_status'].tolist())
    for alg in algorithms:
        builds, saved = mfu(results, 2, algorithms[alg])
        final_file.append([pname, str(alg), 1, '-1', 'new_dynamic', 'stagger_mfu', 2, builds, saved, len(results)])
        
        builds, saved = mfu(results, 3, algorithms[alg])
        final_file.append([pname, str(alg), 1, '-1', 'new_dynamic', 'stagger_mfu', 3, builds, saved, len(results)])
    
    
    
    

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
df = pd.DataFrame(final_file, columns=['project', 'algorithm', 'ci_skip', 'batch_size', 'method', 'update_method', 'factor', 'num_of_builds', 'builds_saved', 'testall_size'])

In [8]:
df.to_csv('mfu.csv')