In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import numpy as np
import scipy.stats as stats

In [2]:
df = pd.read_csv('../../results/results.csv')

flags = df['flag'].unique()
progs = df['program'].unique()

print(f'total program-flag combinations: {len(flags) * len(progs)}')

pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('display.max_colwidth', None)

total program-flag combinations: 252


In [3]:
# Remove outliers from the initial data


In [4]:
# Perform Shapiro-Wilks test for each program-flag combination
# to check if the data follows a normal distribution
norm_df = []

for prog in progs:
    for flag in flags:
        w, pvalue = stats.shapiro(df[(df['flag'] == flag) & (df['program'] == prog)]['pkg_energy'])
        norm_df.append([prog, flag, w, pvalue, pvalue >= 0.05])

norm_df = pd.DataFrame(norm_df, columns=['prog', 'flag', 'sw_w', 'sw_p', 'is_normal'])
norm_count = len(norm_df[norm_df['is_normal'] == True])

print(f'prog-flag combinations that follow a normal distribution: {norm_count}')

prog-flag combinations that follow a normal distribution: 221


In [5]:
# Check Homogeneity of variances

# For data drawn from NORMAL distributions
# Perform Bartlett's test
norm_progs = norm_df[norm_df['is_normal'] == True]['prog'].unique()

homo_df = []
for prog in norm_progs:
    norm_flags = norm_df[(norm_df['prog'] == prog) & (norm_df['is_normal'] == True)]['flag'].values.tolist()

    if (len(norm_flags) <= 1): continue
    
    data = [df[(df['flag'] == flag) & (df['program'] == prog)]['pkg_energy'] for flag in norm_flags]
    w, pvalue = stats.bartlett(*data)
    homo_df.append(['bartlett', prog, w, pvalue, pvalue >= 0.12, len(norm_flags), norm_flags])

# For data that is not drawn from NORMAL distributions
# Perform Lavene's test
# non_progs = norm_df[norm_df['is_normal'] == False]['prog'].unique()
# for prog in non_progs:
#     non_flags = norm_df[(norm_df['prog'] == prog) & (norm_df['is_normal'] == False)]['flag']

#     if (len(non_flags) <= 1): continue

#     data = [df[(df['flag'] == flag) & (df['program'] == prog)]['pkg_energy'] for flag in non_flags]
#     w, pvalue = stats.levene(*data)
#     homo_df.append(['levene', prog, w, pvalue, pvalue > 0.05, len(non_flags),non_flags])

homo_df = pd.DataFrame(homo_df, columns=['test','prog', 'w', 'p', 'eq_var', 'count', 'flags'])
print(homo_df[homo_df['eq_var'] == True].iloc[:, 0:6])

homo_count = homo_df[(homo_df['eq_var'] == True) & (homo_df['test'] == 'bartlett')]['count'].sum()

print(f'\nprog-flag combinations with homogeneity of variances: {homo_count}')

        test            prog        w       p  eq_var  count
1   bartlett     constraints  7.07166 0.62966    True     10
4   bartlett          exp3_8 14.62668 0.14628    True     11
5   bartlett     gen_regexps 14.03554 0.17138    True     11
8   bartlett  real_cacheprof  3.22364 0.97566    True     11
15  bartlett     callback002 12.04364 0.28215    True     11
16  bartlett            chan  9.81531 0.45684    True     11
19  bartlett          awards 10.50859 0.48530    True     12

prog-flag combinations with homogeneity of variances: 77


In [6]:
values = []

for prog in progs:
    data = [df[(df['flag'] == flag) & (df['program'] == prog)]['pkg_energy'] for flag in flags]
    fvalue, pvalue = stats.f_oneway(*data)
    values.append([prog, fvalue, pvalue, pvalue < 0.05])

values_df = pd.DataFrame(values, columns=['Program', 'F Value', 'P Value', 'p < 0.05'])
print(values_df)

           Program     F Value  P Value  p < 0.05
0        cacheprof     2.61449  0.00541      True
1      constraints    15.74610  0.00000      True
2             hash    19.34971  0.00000      True
3       bernouilli     2.40790  0.01026      True
4           exp3_8     3.89546  0.00009      True
5      gen_regexps    99.58709  0.00000      True
6        integrate   155.11694  0.00000      True
7             anna     2.56811  0.00625      True
8   real_cacheprof     4.22365  0.00003      True
9        compress2     4.68830  0.00001      True
10    binary-trees     0.99983  0.45150     False
11           fasta   763.98826  0.00000      True
12    k-nucleotide   360.90387  0.00000      True
13        pidigits     5.97124  0.00000      True
14     callback001    15.84489  0.00000      True
15     callback002     1.80065  0.06243     False
16            chan     3.37400  0.00049      True
17            ansi    14.90519  0.00000      True
18            atom     5.10987  0.00000      True


In [7]:
# ANOVA only considering the program-flag combinations that respect all assumptions

values = []
for prog in homo_df[homo_df['eq_var'] == True]['prog']:
    p_flags = homo_df[homo_df['prog'] == prog]['flags'].values[0]

    data = [df[(df['flag'] == flag) & (df['program'] == prog)]['pkg_energy'] for flag in p_flags]
    fvalue, pvalue = stats.f_oneway(*data)
    values.append([prog, fvalue, pvalue, pvalue < 0.05, p_flags])

values_df = pd.DataFrame(values, columns=['Program', 'F Value', 'P Value', 'p < 0.05', 'flags'])
print(values_df)

          Program   F Value  P Value  p < 0.05  \
0     constraints  15.69057  0.00000      True   
1          exp3_8   3.84234  0.00021      True   
2     gen_regexps  57.03660  0.00000      True   
3  real_cacheprof   4.00977  0.00013      True   
4     callback002   1.67554  0.09706     False   
5            chan   3.74200  0.00027      True   
6          awards 126.60341  0.00000      True   

                                                                                                                                                                           flags  
0                                   [-O0, -fcase-merge, -fcall-arity, -fcmm-elim-common-blocks, -fcmm-sink, -fcpr-anal, -fcse, -fstg-cse, -fdmd-tx-dict-sel, -fdo-eta-reduction]  
1       [-fcase-merge, -fcase-folding, -fcall-arity, -fexitification, -fcmm-elim-common-blocks, -fcmm-sink, -fcpr-anal, -fcse, -fstg-cse, -fdmd-tx-dict-sel, -fdo-eta-reduction]  
2              [-O0, -fcase-merge, -fcase-folding, -fcall-arit