In [5]:
# Import relevant packages
import pandas as pd
import numpy as np
import scipy
import statistics

In [9]:
# Study 1: Does organizing security patterns focus architectural choices?
# Time (our paper), resp. Effort (original paper)
# Read in data; we assume having just the two relevant columns, i.e., Group and Sum
data = pd.read_csv('SecurityPatternsOnlyRelevantData.csv',sep=';',decimal='.')

group1 = data[data.Group == 1]
group1 = group1.drop('Group', axis = 1)

group2 = data[data.Group == 2]
group2 = group2.drop('Group', axis = 1)

print('Mean Group 1:',statistics.mean(group1.Sum))
print('Mean Group 2:',statistics.mean(group2.Sum))

print('Normality Check:',scipy.stats.shapiro(group1))
print('Normality Check:',scipy.stats.shapiro(group2))
print('Significance test (Wilcoxon):',scipy.stats.ranksums(group1, group2))

Mean Group 1: 1483.8095238095239
Mean Group 2: 1763.534090909091
Normality Check: (0.9216144680976868, 7.67596357036382e-05)
Normality Check: (0.6352300047874451, 1.84106243163e-13)
Significance test (Wilcoxon): RanksumsResult(statistic=-1.1502613669820627, pvalue=0.25003623757007165)


In [12]:
# Study 1: Does organizing security patterns focus architectural choices?
# Efficiency (original paper)
# Read in data; we assume having just the two relevant columns, i.e., Group and Values
data = pd.read_csv('SecurityPatternsOnlyRelevantDataEfficiency.csv',sep=';',decimal='.')

group1 = data[data.Group == 1]
group1 = group1.drop('Group', axis = 1)
group1.reset_index(inplace=True, drop=True)

group2 = data[data.Group == 2]
group2 = group2.drop('Group', axis = 1)
group2.reset_index(inplace=True, drop=True)

print('Mean Group 1:',statistics.mean(group1.Values))
print('Mean Group 2:',statistics.mean(group2.Values))

print('Normality Check:',scipy.stats.shapiro(group1))
print('Normality Check:',scipy.stats.shapiro(group2))
print('Significance test (Wilcoxon):',scipy.stats.ranksums(group1, group2))

Mean Group 1: 0.14324798466666666
Mean Group 2: 0.21079198159770116
Normality Check: (0.8466210961341858, 1.6525716262094647e-07)
Normality Check: (0.8402353525161743, 2.9145654778517383e-08)
Significance test (Wilcoxon): RanksumsResult(statistic=-2.849337507878849, pvalue=0.0043810378189598626)


In [16]:
# Study 2: Do background colors improve program comprehension in the #ifdef hell?

# Read data
colorsTime = pd.read_csv('backgroundcolorsTime.csv',sep=';',decimal='.')
# get the group with the colors
groupCS = colorsTime[colorsTime.Group == 'color']
# drop the maintenance tasks
groupCS = groupCS.drop('M', axis = 1)
# drop the group
groupCS = groupCS.drop('Group', axis = 1)
# abbreviation: groupCS: data of the group with background colors (C); only static tasks (S)

groupCM = colorsTime[colorsTime.Group == 'color']
groupCM = groupCM.drop('S', axis = 1)
groupCM = groupCM.drop('Group', axis = 1)

groupIS = colorsTime[colorsTime.Group == 'ifdef']
groupIS = groupIS.drop('M', axis = 1)
groupIS = groupIS.drop('Group', axis = 1)

groupIM = colorsTime[colorsTime.Group == 'ifdef']
groupIM = groupIM.drop('S', axis = 1)
groupIM = groupIM.drop('Group', axis = 1)

print('Mean Group idef, static:',statistics.mean(groupIS.S.values))
print('Mean Group color, static:',statistics.mean(groupCS.S.values))
print('Mean Group ifdef, maintenance:',statistics.mean(groupIM.M.values))
print('Mean Group color, maintenance:',statistics.mean(groupCM.M.values))

print('Normality Check:',shapiro(groupIM))
print('Normality Check:',shapiro(groupIS))
print('Normality Check:',shapiro(groupCM))
print('Normality Check:',shapiro(groupCS))


print('Significance test (t test):',scipy.stats.ttest_ind(groupIS, groupCS))
print('Significance test (t test):',scipy.stats.ttest_ind(groupIM, groupCM))

Mean Group idef, static: 554.1190476190476
Mean Group color, static: 352.8636363636364
Mean Group ifdef, maintenance: 515.9880952380952
Mean Group color, maintenance: 657.125
Normality Check: (0.9590693712234497, 0.4975792467594147)
Normality Check: (0.9600026607513428, 0.5161784291267395)
Normality Check: (0.9510636329650879, 0.331510066986084)
Normality Check: (0.9457451105117798, 0.25947996973991394)
Significance test (t test): Ttest_indResult(statistic=array([3.93815022]), pvalue=array([0.00031169]))
Significance test (t test): Ttest_indResult(statistic=array([-2.49144762]), pvalue=array([0.01686067]))


In [17]:
# Study 2: Do background colors improve program comprehension in the #ifdef hell?
# correctness

#Created contingency table for correctness of static and maintenance tasks
# according to Figure 5 of the paper; sum up all values of correctness and incorrectness
# for static tasks, and the same for maintenance tasks
static = np.array([[26,19],[18,23]])
maintenance = np.array([[76,62],[12,22]])

# then run the chi^2 tests for both contingency tables
print('Significance test(chi^2), static:',scipy.stats.chi2_contingency(static))
print('Significance test(chi^2), maintenace:',scipy.stats.chi2_contingency(maintenance))

Significance test(chi^2), static: (1.1443511772780082, 0.2847347757810025, 1, array([[23.02325581, 21.97674419],
       [20.97674419, 20.02325581]]))
Significance test(chi^2), maintenace: (3.5158688640146414, 0.060783777832451834, 1, array([[70.60465116, 67.39534884],
       [17.39534884, 16.60465116]]))


In [20]:
#Study 3: Do Developers Read Compiler Error Messages?

# Create contingency table for correctness/incorrectness/timout (according to Table 2 of original paper)
correctness = np.array([[31,98,35],[66,10,34],[104,25,1],[50,2,25],[70,28,12]])
print('Significance test(chi^2)',scipy.stats.chi2_contingency(correctness))

Significance test(chi^2) (198.16281320517598, 1.557871842431496e-38, 8, array([[89.07614213, 45.23181049, 29.69204738],
       [59.74619289, 30.33840948, 19.91539763],
       [70.60913706, 35.85448393, 23.53637902],
       [41.82233503, 21.23688663, 13.94077834],
       [59.74619289, 30.33840948, 19.91539763]]))


In [21]:
#Study 3: Do Developers Read Compiler Error Messages?

# Create contingency table for correctness/incorrectness/timout (according to Table 2 of original paper)
Semantic = np.array([70.66666667,19.66666667,9.333333333])
Dependency = np.array([76.5,14.5,8.5])
Typemismatch =np.array([66.5,24,10])
Syntax =np.array([65,20,15])
Other =np.array([73,16,11])

print('Signifance tests (ANOVA):',scipy.stats.f_oneway(Semantic,Dependency,Typemismatch,Syntax,Other))

Signifance tests (ANOVA): F_onewayResult(statistic=4.624386369675644e-05, pvalue=0.9999999948680551)
