In [19]:
import os
from statistics import median
import numpy as np
import pandas as pd
import functools as ft

In [20]:
pd.options.mode.chained_assignment = None  # default='warn'

In [21]:
class ProcessEvoSuiteData:
    """ Class that processes data from EvoSuite. """

    def __init__(self):
        self.name = "ProcessEvoSuiteData"

    def print_name(self):
        os.system(f"echo The name is: {self.name}")

    def calculate_medians60(self):
        """ Extract results from the EvoSuite results csv files with time bugdet 60. """

        # Read EvoSuite results
        res60 = pd.read_csv("res_data/results-60.csv")
        configuration_ids = ['weak_60', 'branch_60', 'default_60']
        
        # Sort by configuration
        res = res60.loc[:,['TARGET_CLASS', 'configuration_id', 'project.id', 'BranchCoverage']]
        result = res[res.apply(lambda row : row["configuration_id"] in configuration_ids, axis=1)]

        # Take medians of the 10 runs of EvoSuite per class
        medians = result.groupby(['TARGET_CLASS', 'configuration_id', 'project.id'])['BranchCoverage'].median()
        medians.to_csv('medians.csv')

        # Compute differences between BRANCH;WEAKMUTATION and BRANCH
        res_medians = pd.read_csv("medians.csv")
        res_medians.to_csv('res_medians.csv')
        return
    
    def get_ck_metrics(self):
        """ Will be used as features for the model. """

        class_metrics = pd.read_csv("ck_data/class.csv")
        class_metrics = class_metrics.iloc[:, 1:]
        return class_metrics

In [22]:
# 
# BRANCH COVERAGE
# 

res60 = pd.read_csv("res_data/results-60.csv")
configuration_ids = ['weak_60', 'branch_60', 'default_60']

# Sort by configuration
res = res60.loc[:,['TARGET_CLASS', 'configuration_id', 'project.id', 'BranchCoverage']]
result = res[res.apply(lambda row : row["configuration_id"] in configuration_ids, axis=1)]

# print(result.to_csv('outfil1.csv'))

# Take medians of the 10 runs of EvoSuite per class
medians = result.groupby(['TARGET_CLASS', 'configuration_id', 'project.id'])['BranchCoverage'].median()
medians.to_csv('medians_coverage.csv')
medians = pd.read_csv("medians_coverage.csv")

# "weak" rows
weak_medians = medians.loc[medians['configuration_id'] == 'weak_60'].drop(['configuration_id'], axis=1)
# weak_medians.to_csv('weak_medians.csv')

# "branch" rows
branch_medians = medians.loc[medians['configuration_id'] == 'branch_60'].drop(['configuration_id'], axis=1)
# branch_medians.to_csv('branch_medians.csv')

# "default" rows
default_medians = medians.loc[medians['configuration_id'] == 'default_60'].drop(['configuration_id'], axis=1)
# default_medians.to_csv('default_medians.csv')

diff = pd.merge(weak_medians, branch_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff['weak-branch'] = diff['BranchCoverage_x'] - diff['BranchCoverage_y']
diff.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

diff_default = pd.merge(weak_medians, default_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff_default['weak-default'] = diff_default['BranchCoverage_x'] - diff_default['BranchCoverage_y']
diff_default.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

weak_worse_branch = list(filter(lambda x: x < 0, diff['weak-branch']))
weak_better_branch = list(filter(lambda x: x > 0, diff['weak-branch']))
weak_eq_branch = list(filter(lambda x: x == 0, diff['weak-branch']))

print('WEAK VS BRANCH:')
print('weak is worse in ' + str(len(weak_worse_branch)) + ' out of ' + str(len(diff['weak-branch'])) + ' cases')
print('weak is better in ' + str(len(weak_better_branch)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_branch)) + ' cases')

weak_worse_default = list(filter(lambda x: x < 0, diff_default['weak-default']))
weak_better_default = list(filter(lambda x: x > 0, diff_default['weak-default']))
weak_eq_default = list(filter(lambda x: x == 0, diff_default['weak-default']))

print()
print('WEAK VS DEFAULT:')
print('weak is worse in ' + str(len(weak_worse_default)) + ' out of ' + str(len(diff_default['weak-default'])) + ' cases')
print('weak is better in ' + str(len(weak_better_default)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_default)) + ' cases')

# Compute differences between BRANCH;WEAKMUTATION and BRANCH
# res_medians = pd.read_csv("medians.csv")
# res_medians.to_csv('res_medians.csv')

WEAK VS BRANCH:
weak is worse in 105 out of 336 cases
weak is better in 43 cases
weak is equal in 188 cases

WEAK VS DEFAULT:
weak is worse in 33 out of 335 cases
weak is better in 124 cases
weak is equal in 178 cases


In [12]:
# 
# BRANCH COVERAGE
# 

print('results 180')
res60 = pd.read_csv("res_data/results-180.csv")
configuration_ids = ['weak_180', 'branch_180', 'default_180']

# Sort by configuration
res = res60.loc[:,['TARGET_CLASS', 'configuration_id', 'project.id', 'BranchCoverage']]
result = res[res.apply(lambda row : row["configuration_id"] in configuration_ids, axis=1)]

# print(result.to_csv('outfil1.csv'))

# Take medians of the 10 runs of EvoSuite per class
medians = result.groupby(['TARGET_CLASS', 'configuration_id', 'project.id'])['BranchCoverage'].median()
medians.to_csv('medians_coverage.csv')
medians = pd.read_csv("medians_coverage.csv")

# "weak" rows
weak_medians = medians.loc[medians['configuration_id'] == 'weak_180'].drop(['configuration_id'], axis=1)
# weak_medians.to_csv('weak_medians.csv')

# "branch" rows
branch_medians = medians.loc[medians['configuration_id'] == 'branch_180'].drop(['configuration_id'], axis=1)
# branch_medians.to_csv('branch_medians.csv')

# "default" rows
default_medians = medians.loc[medians['configuration_id'] == 'default_180'].drop(['configuration_id'], axis=1)
# default_medians.to_csv('default_medians.csv')

diff = pd.merge(weak_medians, branch_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff['weak-branch'] = diff['BranchCoverage_x'] - diff['BranchCoverage_y']
diff.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

diff_default = pd.merge(weak_medians, default_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff_default['weak-default'] = diff_default['BranchCoverage_x'] - diff_default['BranchCoverage_y']
diff_default.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

weak_worse_branch = list(filter(lambda x: x < 0, diff['weak-branch']))
weak_better_branch = list(filter(lambda x: x > 0, diff['weak-branch']))
weak_eq_branch = list(filter(lambda x: x == 0, diff['weak-branch']))

print('WEAK VS BRANCH:')
print('weak is worse in ' + str(len(weak_worse_branch)) + ' out of ' + str(len(diff['weak-branch'])) + ' cases')
print('weak is better in ' + str(len(weak_better_branch)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_branch)) + ' cases')

weak_worse_default = list(filter(lambda x: x < 0, diff_default['weak-default']))
weak_better_default = list(filter(lambda x: x > 0, diff_default['weak-default']))
weak_eq_default = list(filter(lambda x: x == 0, diff_default['weak-default']))

print()
print('WEAK VS DEFAULT:')
print('weak is worse in ' + str(len(weak_worse_default)) + ' out of ' + str(len(diff_default['weak-default'])) + ' cases')
print('weak is better in ' + str(len(weak_better_default)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_default)) + ' cases')

# Compute differences between BRANCH;WEAKMUTATION and BRANCH
# res_medians = pd.read_csv("medians.csv")
# res_medians.to_csv('res_medians.csv')

results 180
WEAK VS BRANCH:
weak is worse in 94 out of 336 cases
weak is better in 48 cases
weak is equal in 194 cases

WEAK VS DEFAULT:
weak is worse in 36 out of 333 cases
weak is better in 102 cases
weak is equal in 195 cases


In [15]:
# 
# BRANCH COVERAGE
# 

print('results 300')
res60 = pd.read_csv("res_data/results-300.csv")
configuration_ids = ['weak_300', 'branch_300', 'default_300']

# Sort by configuration
res = res60.loc[:,['TARGET_CLASS', 'configuration_id', 'project.id', 'BranchCoverage']]
result = res[res.apply(lambda row : row["configuration_id"] in configuration_ids, axis=1)]

# print(result.to_csv('outfil1.csv'))

# Take medians of the 10 runs of EvoSuite per class
medians = result.groupby(['TARGET_CLASS', 'configuration_id', 'project.id'])['BranchCoverage'].median()
medians.to_csv('medians_coverage.csv')
medians = pd.read_csv("medians_coverage.csv")

# "weak" rows
weak_medians = medians.loc[medians['configuration_id'] == 'weak_300'].drop(['configuration_id'], axis=1)
# weak_medians.to_csv('weak_medians.csv')

# "branch" rows
branch_medians = medians.loc[medians['configuration_id'] == 'branch_300'].drop(['configuration_id'], axis=1)
# branch_medians.to_csv('branch_medians.csv')

# "default" rows
default_medians = medians.loc[medians['configuration_id'] == 'default_300'].drop(['configuration_id'], axis=1)
# default_medians.to_csv('default_medians.csv')

diff = pd.merge(weak_medians, branch_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff['weak-branch'] = diff['BranchCoverage_x'] - diff['BranchCoverage_y']
diff.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

diff_default = pd.merge(weak_medians, default_medians, on=['TARGET_CLASS','project.id'], how='inner')
diff_default['weak-default'] = diff_default['BranchCoverage_x'] - diff_default['BranchCoverage_y']
diff_default.drop(['BranchCoverage_x', 'BranchCoverage_y'], axis=1)

weak_worse_branch = list(filter(lambda x: x < 0, diff['weak-branch']))
weak_better_branch = list(filter(lambda x: x > 0, diff['weak-branch']))
weak_eq_branch = list(filter(lambda x: x == 0, diff['weak-branch']))

print('WEAK VS BRANCH:')
print('weak is worse in ' + str(len(weak_worse_branch)) + ' out of ' + str(len(diff['weak-branch'])) + ' cases')
print('weak is better in ' + str(len(weak_better_branch)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_branch)) + ' cases')

weak_worse_default = list(filter(lambda x: x < 0, diff_default['weak-default']))
weak_better_default = list(filter(lambda x: x > 0, diff_default['weak-default']))
weak_eq_default = list(filter(lambda x: x == 0, diff_default['weak-default']))

print()
print('WEAK VS DEFAULT:')
print('weak is worse in ' + str(len(weak_worse_default)) + ' out of ' + str(len(diff_default['weak-default'])) + ' cases')
print('weak is better in ' + str(len(weak_better_default)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_default)) + ' cases')

# Compute differences between BRANCH;WEAKMUTATION and BRANCH
# res_medians = pd.read_csv("medians.csv")
# res_medians.to_csv('res_medians.csv')

results 300
WEAK VS BRANCH:
weak is worse in 84 out of 334 cases
weak is better in 44 cases
weak is equal in 206 cases

WEAK VS DEFAULT:
weak is worse in 30 out of 329 cases
weak is better in 82 cases
weak is equal in 217 cases


In [8]:
# 
# MUTATION SCORE
# 

print("mutation score")
res60_mutation = pd.read_csv("res_data/mutation_scores.csv")
configuration_ids = ['weak_60', 'branch_60', 'default_60']

# Sort by configuration
res_mutation = res60_mutation.loc[:,['class', 'configuration', 'project', 'mutation_score_percent']]
result_mutation = res_mutation[res_mutation.apply(lambda row : row["configuration"] in configuration_ids, axis=1)]

# print(result.to_csv('outfil1.csv'))

# Take medians of the 10 runs of EvoSuite per class
medians_mutation = result_mutation.groupby(['class', 'configuration', 'project'])['mutation_score_percent'].median()
medians_mutation.to_csv('medians_mutation.csv')
medians_mutation = pd.read_csv("medians_mutation.csv")

# "weak" rows
weak_medians_mutation = medians_mutation.loc[medians_mutation['configuration'] == 'weak_60'].drop(['configuration'], axis=1)
# weak_medians.to_csv('weak_medians.csv')

# "branch" rows
branch_medians_mutation = medians_mutation.loc[medians_mutation['configuration'] == 'branch_60'].drop(['configuration'], axis=1)
# branch_medians.to_csv('branch_medians.csv')

# "default" rows
default_medians_mutation = medians_mutation.loc[medians_mutation['configuration'] == 'default_60'].drop(['configuration'], axis=1)
# default_medians.to_csv('default_medians.csv')

diff_mutation = pd.merge(weak_medians_mutation, branch_medians_mutation, on=['class','project'], how='inner')
diff_mutation['weak-branch'] = diff_mutation['mutation_score_percent_x'] - diff_mutation['mutation_score_percent_y']
diff_mutation.drop(['mutation_score_percent_x', 'mutation_score_percent_y'], axis=1)

diff_default_mutation = pd.merge(weak_medians_mutation, default_medians_mutation, on=['class','project'], how='inner')
diff_default_mutation['weak-default'] = diff_default_mutation['mutation_score_percent_x'] - diff_default_mutation['mutation_score_percent_y']
diff_default_mutation.drop(['mutation_score_percent_x', 'mutation_score_percent_y'], axis=1)

weak_worse_branch_mutation = list(filter(lambda x: x < 0, diff_mutation['weak-branch']))
weak_better_branch_mutation = list(filter(lambda x: x > 0, diff_mutation['weak-branch']))
weak_eq_branch_mutation = list(filter(lambda x: x == 0, diff_mutation['weak-branch']))

print('WEAK VS BRANCH:')
print('weak is worse in ' + str(len(weak_worse_branch_mutation)) + ' out of ' + str(len(diff_mutation['weak-branch'])) + ' cases')
print('weak is better in ' + str(len(weak_better_branch_mutation)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_branch_mutation)) + ' cases')

weak_worse_default_mutation = list(filter(lambda x: x < 0, diff_default_mutation['weak-default']))
weak_better_default_mutation = list(filter(lambda x: x > 0, diff_default_mutation['weak-default']))
weak_eq_default_mutation = list(filter(lambda x: x == 0, diff_default_mutation['weak-default']))

print()
print('WEAK VS DEFAULT:')
print('weak is worse in ' + str(len(weak_worse_default_mutation)) + ' out of ' + str(len(diff_mutation['weak-branch'])) + ' cases')
print('weak is better in ' + str(len(weak_better_default_mutation)) + ' cases')
print('weak is equal in ' + str(len(weak_eq_default_mutation)) + ' cases')


results 60
WEAK VS BRANCH:
weak is worse in 72 out of 319 cases
weak is better in 134 cases
weak is equal in 113 cases

WEAK VS DEFAULT:
weak is worse in 163 out of 319 cases
weak is better in 50 cases
weak is equal in 105 cases


In [6]:
weak_medians

Unnamed: 0,TARGET_CLASS,project.id,BranchCoverage
2,Capture,47_dvd-homevideo,0.103448
5,Client,95_celwars2009,0.005714
8,Convert,47_dvd-homevideo,0.115385
11,JSci.maths.LinearMath,jsci,0.076336
14,JSci.maths.SpecialMath,jsci,0.234694
...,...,...,...
996,weka.core.Memory,107_weka,0.758621
999,wheel.asm.ClassReader,80_wheelwebtool,0.349449
1002,wheel.asm.ClassWriter,80_wheelwebtool,0.867816
1005,wheel.asm.FieldWriter,80_wheelwebtool,1.000000


In [7]:
# res[res.apply(lambda row : row["Branch_Coverage"] in configuration_ids, axis=1)]
# res = res[(res['configuration_id'].to_string() in ['weak_60', 'branch_60'])]
def normalize_row(row):
    if row['configuration_id'] == 'weak_60':
        row['BranchCoverage'] = row['BranchCoverage'] - float(weak_medians.loc[weak_medians['TARGET_CLASS'] == row['TARGET_CLASS']]['BranchCoverage'].to_string().split()[1].strip())
        return row
    elif row['configuration_id'] == 'branch_60':
        row['BranchCoverage'] = row['BranchCoverage'] - float(branch_medians.loc[branch_medians['TARGET_CLASS'] == row['TARGET_CLASS']]['BranchCoverage'].to_string().split()[1].strip())
        return row
    else:
        return row
res.apply(normalize_row, axis=1)

Unnamed: 0,TARGET_CLASS,configuration_id,project.id,BranchCoverage
0,org.znerd.xmlenc.XMLEncoder,weak_60,xmlenc,5.494560e-03
1,org.znerd.xmlenc.XMLEncoder,weak_60,xmlenc,-2.197797e-02
2,org.znerd.xmlenc.XMLEncoder,weak_60,xmlenc,5.500000e-08
3,org.znerd.xmlenc.XMLEncoder,weak_60,xmlenc,5.494560e-03
4,org.znerd.xmlenc.XMLEncoder,weak_60,xmlenc,-1.648346e-02
...,...,...,...,...
23434,com.ib.client.ComboLeg,branch_60,1_tullibee,0.000000e+00
23435,com.ib.client.ComboLeg,branch_60,1_tullibee,0.000000e+00
23436,com.ib.client.ComboLeg,branch_60,1_tullibee,0.000000e+00
23437,com.ib.client.ComboLeg,branch_60,1_tullibee,0.000000e+00
