In [1]:
import pandas as pd
import math

import re
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import proportions_ztest
import os.path
import seaborn as sns
from utils import *
from sklearn.metrics import cohen_kappa_score

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
IMAGES_PATH='images-2022/'
latex_vars=dict()
tp_pr_name='Test-Pairs PRs'


# Gather data

In [3]:
def extract_project(x):
    return '/'.join(x.split('/')[3:5])

In [4]:
npr=pd.read_csv('new_pullreq.csv')

In [5]:
# TODO: how have we derived projects.csv?
projects=pd.read_csv('projects.csv')
allowed_projects=projects[projects.source=='new_pullreq.csv']['project']

In [6]:
%%time
# TODO: how have we derived all_prs_with_files?
all_prs_with_files=pd.read_csv('all_prs_with_files.csv')
if not os.path.isfile('cached_all_prs.csv'):
    all_prs=generate_all_prs(all_prs_with_files)
    all_prs['selected']='Other'
    all_prs.loc[(all_prs['test_pairs'] > 0) & (all_prs['changed_files'] < 10),'selected']='small_with_tpairs'
    all_prs.loc[~(all_prs['test_pairs'] > 0) & (all_prs['changed_files'] < 10),'selected']='small_without_tpairs'
    all_prs.loc[(all_prs['test_pairs'] > 0) & ~(all_prs['changed_files'] < 10),'selected']='large_with_tpairs'
    all_prs.loc[~(all_prs['test_pairs'] > 0) & ~(all_prs['changed_files'] < 10),'selected']='large_without_tpairs'
    all_prs['churn']=all_prs['prod_additions']+all_prs['prod_deletions']+all_prs['test_additions']+all_prs['test_deletions']
    all_prs[all_prs.selected.isin(['small_with_tpairs','large_with_tpairs'])][['churn','changed_files','test_pairs']].quantile([0.1,0.5,0.6,0.62,0.9])
    all_prs['title_mask']=all_prs.title.apply(lambda x: is_testability_relevant(x)[1])
    all_prs.to_csv('cached_all_prs.csv',index=False)
else:
    all_prs=pd.read_csv('cached_all_prs.csv')

CPU times: user 24 s, sys: 3.78 s, total: 27.8 s
Wall time: 28.5 s


In [7]:
len(all_prs)

842723

In [8]:
all_prs['project']=all_prs['url'].apply(extract_project)

In [9]:
all_prs_raw=all_prs
all_prs=all_prs[all_prs['project'].isin(allowed_projects)]

In [10]:
def calc_agreement_rates(m, key1='pr_group_primary',key2='pr_group_secondary'):
    m.loc[m[key1].isnull(),key1]='irrelevant'
    m.loc[m[key2].isnull(),key2]='irrelevant'
    print(cohen_kappa_score(m[key1],m[key2]))
    d=pd.crosstab(m[key1],m[key2],margins=True)
    print(df_to_latex(d))
    return d

In [11]:
manually_reviewed_raw=pd.read_csv('reviewed.csv')
manually_reviewed=manually_reviewed_raw
primary_reviewed=pd.read_csv('primary_reviewed.csv')
secondary_reviewed=pd.read_csv('secondary_reviewed.csv')

In [12]:
# derive projects
manually_reviewed['project']=manually_reviewed['url'].apply(extract_project)
primary_reviewed['project']=primary_reviewed['url'].apply(extract_project)
secondary_reviewed['project']=secondary_reviewed['url'].apply(extract_project)
#take projects from new_pullreq.csv only
manually_reviewed=manually_reviewed[manually_reviewed.project.isin(allowed_projects)]
primary_reviewed=primary_reviewed[primary_reviewed.project.isin(allowed_projects)]
secondary_reviewed=secondary_reviewed[secondary_reviewed.project.isin(allowed_projects)]


In [13]:
# limit to 50 PRs per masks

In [14]:
len(set(manually_reviewed.url))

812

In [15]:
manually_reviewed.title_mask.value_counts()

test                 162
Other                152
testability_body     114
Refactor for test     91
Dependency            79
Concurrency           76
Inject                76
Network               62
Singleton             24
testability           21
Name: title_mask, dtype: int64

In [16]:
masks_to_downsample=['Dependency','Concurrency','Network','Inject']
downsample_limit=50
df=manually_reviewed[~manually_reviewed.title_mask.isin(masks_to_downsample)]
for mask in masks_to_downsample:
    urls=manually_reviewed[manually_reviewed.title_mask==mask]['url'].drop_duplicates()
    durls=manually_reviewed[(manually_reviewed.title_mask==mask) & 
                           (manually_reviewed.url.isin(secondary_reviewed.url))]['url'].drop_duplicates()
    resturls=urls[~urls.isin(durls)]
    if len(durls)>downsample_limit:
        durls=durls.sample(n=downsample_limit, random_state=42)
    n=downsample_limit-len(durls)
    print(mask, n)
    dr=manually_reviewed[manually_reviewed.url.isin(durls)]
    sr=manually_reviewed[manually_reviewed.url.isin(resturls.sample(n=n, random_state=42))]
    df=df.append(dr).append(sr)

Dependency 0
Concurrency 0
Network 5
Inject 0


In [17]:
df[['title_mask','url']].drop_duplicates().title_mask.value_counts()

Other                150
test                 150
testability_body     109
Refactor for test     73
Dependency            50
Concurrency           50
Network               50
Inject                50
Singleton             24
testability           18
Name: title_mask, dtype: int64

In [18]:
manually_reviewed=manually_reviewed[manually_reviewed.url.isin(df.url)]
primary_reviewed=primary_reviewed[primary_reviewed.url.isin(df.url)]
secondary_reviewed=secondary_reviewed[secondary_reviewed.url.isin(df.url)]

In [20]:
# generate list of PRs to review
upsample_masks=['Other','test']
prs_to_sample=pd.DataFrame()
upsample_limit=100
for mask in upsample_masks:
    sel=all_prs[~(all_prs.url.isin(manually_reviewed_raw.url)) & \
        (all_prs.java_files_count < 10) & (all_prs.test_pairs > 0)  \
        & (all_prs.title_mask==mask)]
    already_asked=pd.read_csv('aug2022_'+mask+'.csv')
    sel=sel[~sel.url.isin(already_asked.url)]
    left_to_sample=upsample_limit-len(set(manually_reviewed[manually_reviewed.title_mask==mask].url))
    left_to_sample=50
    mask_sel=sel.sample(n=left_to_sample, random_state=42)
    mask_sel[['url','title_mask']].to_csv('sample_16aug2022_' + mask + '.csv', index=False)
    prs_to_sample=prs_to_sample.append(mask_sel)


In [21]:
prs_to_sample.title_mask.value_counts()

Other    50
test     50
Name: title_mask, dtype: int64

In [None]:
testable_prs=pd.read_csv('testable_prs.csv')

In [None]:
m=primary_reviewed[['url','pr_group','period']].drop_duplicates().\
    merge(secondary_reviewed[['url','pr_group','period']].drop_duplicates(), on=['url','period'],suffixes=['_primary','_secondary'])
m['relevant_primary']=m['pr_group_primary']!='irrelevant'
m['relevant_secondary']=m['pr_group_secondary']!='irrelevant'
latex_vars['prgrouptwokappa']=round(cohen_kappa_score(m['relevant_primary'],m['relevant_secondary']),3)
latex_vars['prgroupthreekappa']=round(cohen_kappa_score(m['pr_group_primary'],m['pr_group_secondary']),3)

In [None]:
def calc_pattern_match(a,b):
    if set(a)==set(b):
        return 'equal matches'

    common=set(a).intersection(set(b))
    if len(common) > 0:
        return 'partial matches'
    return 'no matches'
m['match']=m['url'].apply(lambda u: \
    calc_pattern_match(primary_reviewed[primary_reviewed.url==u]['ref_pattern'], \
                       secondary_reviewed[secondary_reviewed.url==u]['ref_pattern']))

In [None]:
# table 5
by_equality=m.match.value_counts()
by_equality['total']=by_equality.sum()
by_equality=pd.DataFrame(by_equality).reset_index()
by_equality['name']=by_equality.index
by_equality_s=by_equality[['index','match']].to_latex(index=False,header=['Agreement level','PR count'])
by_equality_s=by_equality_s.replace('toprule','hline').replace('bottomrule','hline')
by_equality_l=by_equality_s.split('\n')
by_equality_s='\n'.join(by_equality_l[:-4])+'\n\hline\n'+'\n'.join(by_equality_l[-4:])
print(by_equality_s)
open('table_two_coders_agreement.tex','w').write(by_equality_s)

In [None]:
# calculate kappa for PRs with only one pattern
if True:
    single_urls=manually_reviewed.url.value_counts()
    single_urls=single_urls[single_urls==1].index
    rpm=primary_reviewed[primary_reviewed.url.isin(single_urls)][['url','pr_group','ref_pattern']].\
        merge(secondary_reviewed[secondary_reviewed.url.isin(single_urls)][['url','pr_group','ref_pattern','comment','fauthor_comment','reviewer']],on='url',suffixes=['_primary','_secondary'])
#    calc_agreement_rates(rpm,'pr_group_primary','pr_group_secondary')
#    calc_agreement_rates(rpm,'ref_pattern_primary','ref_pattern_secondary')


# Prepare headers

In [None]:
#not so sure manually_reviewed.loc[manually_reviewed['title_mask']=='testability_body','title_mask']='testability'

In [None]:
result_by_url=manually_reviewed[['url','pr_group','title_mask']].drop_duplicates()
result_by_url=result_by_url[result_by_url.title_mask.notnull()]
result_by_url['doublechecked']=result_by_url['url'].isin(secondary_reviewed.url)
#result_by_url=result_by_url.merge(all_prs[['url','title_mask']].drop_duplicates())
result_by_url['project']=result_by_url['url'].apply(get_project)

latex_vars['manuallyreviewedprcount']=len(set(result_by_url['url']))
latex_vars['testabilityrelevantprcount']=len(set(result_by_url[result_by_url['pr_group']!='irrelevant']['url']))
latex_vars['testabilityrelevantoccurrencecount']=len(manually_reviewed[manually_reviewed['pr_group']!='irrelevant'])
print('We have manually reviewed %d pull requests from %d open source java projects and in %d of them we have identified refactoring patterns related to unit-tests' % \
      (latex_vars['manuallyreviewedprcount'], len(set(result_by_url['project'])), 
       latex_vars['testabilityrelevantprcount']))

In [None]:
manually_reviewed[manually_reviewed['pr_group']=='other']

In [None]:
latex_vars['nprcount']=len(npr)
latex_vars['nprcount']

In [None]:
latex_vars['allprscount']=len(all_prs)
latex_vars['allprsonejavacount']=len(all_prs[all_prs['java_files_count'] > 0])


In [None]:
latex_vars['tpprcount']=len(all_prs[(all_prs['test_pairs']>0) & (all_prs['java_files_count'] > 0)])
latex_vars['otherprcount']=len(all_prs[(all_prs['test_pairs']==0) & (all_prs['java_files_count'] > 0)])
(latex_vars['tpprcount'],latex_vars['otherprcount'])

In [None]:
latex_vars['tpprtencount']=len(all_prs.query('test_pairs > 0 & changed_files < 10'))
latex_vars['tpprtencount']

In [None]:
jp=npr[npr['language']=='Java']
jp['project']=jp[['ownername','reponame']].apply(lambda xs: xs['ownername'] + "/" + xs['reponame'], axis=1)
top_projects=jp[jp['core_member']==0][['project','merged_or_not']].groupby(by=['project']).count()
top_projects['cnt'] = top_projects['merged_or_not']
top_projects['project']=top_projects.index
top_projects=top_projects[top_projects['cnt'] > 50]    

In [None]:
latex_vars['javaprojcount']=len(set(jp['project']))
latex_vars['javaprojcount']

In [None]:
latex_vars['javaprojselectedcount']=len(set(top_projects['project']))
latex_vars['javaprojselectedcount']

In [None]:
latex_vars['allprsprojcount']=len(set(all_prs['project']))

In [None]:
latex_vars['javaprojcount']=len(set(jp['project']))

In [None]:
cached_30k_mined=pd.read_csv('cached_30k_mined.csv')

In [None]:
# vars-summary
# all-prs - summary of PRs, derived from .json

In [None]:
%%time
mined_30k=pd.read_csv('prs_to_sample2.csv')
mined_30k=mined_30k.merge(all_prs[['selected','url','title_mask','test_pairs']].drop_duplicates())
# mined random sample rq1
# take only allowed projects
mined_30k['project']=mined_30k['url'].apply(extract_project)
mined_30k=mined_30k[mined_30k['project'].isin(allowed_projects)]
len(mined_30k)


In [None]:
latex_vars['minedprojectscount']=len(set(mined_30k['url'].apply(get_project)))

In [None]:
latex_vars['minedprcount']=len(set(mined_30k['url']))

In [None]:
mined_30k.loc[mined_30k.url.isin(manually_reviewed[manually_reviewed.title_mask=='testability_body']['url']),\
              'title_mask']='testability_body'

In [None]:
def rows_to_latex(df, header=False,index=True,formatters=None):
    lines=df.to_latex(header=header,index=index,formatters=formatters).split('\n')

    lines=[line for line in lines if not re.match(r'\\begin|\\end|\\toprule|\\bottomrule', line)]
    lines=['\\rowcolor{palegrey}' + x[0]  if x[1] % 2 == 0 else x[0] for x in zip(lines,range(len(lines)))]
    return '\n'.join(lines)

In [None]:
t2_mr=manually_reviewed[['url','title_mask']].drop_duplicates()
t2_retrieved=(t2_mr.append(all_prs[all_prs['selected']=='small_with_tpairs'][['url','title_mask']])).drop_duplicates()['title_mask'].value_counts()
t2_sampled=(t2_mr.append(manually_reviewed[['url','title_mask']])).drop_duplicates()['title_mask'].value_counts()
t2_mined=mined_30k[mined_30k['test_pairs'] >0][['url','title_mask']].drop_duplicates()['title_mask'].value_counts()
#t2_mined=(t2_mr.append(mined_30k[mined_30k['test_pairs'] >0][['url','title_mask']])).drop_duplicates()['title_mask'].value_counts()
table2=pd.DataFrame(t2_retrieved).reset_index().rename(columns={'title_mask':'retrieved'})
table2=table2.merge(pd.DataFrame(t2_mined).reset_index().rename(columns={'title_mask':'mined'}), how='left')
table2=table2.merge(pd.DataFrame(t2_sampled).reset_index().rename(columns={'title_mask':'sampled'}))
table2=table2.rename(columns={'index':'title_mask'})
table2['mined']=table2['mined'].fillna(0)
table2['retrieved_pct']=round(table2['retrieved']/sum(table2['retrieved'])*100,1)
table2['mined_pct']=round(table2['mined']/sum(table2['mined'])*100,1)
table2['sampled_pct']=round(table2['sampled']/sum(table2['sampled'])*100,1)
table2['mined']=table2['mined'].astype(int)
table2=table2[['title_mask','mined','mined_pct','retrieved','sampled','sampled_pct']]
table2.columns = pd.MultiIndex.from_tuples([('title_mask','title_mask'),
                                            ('RQ1', 'mined'),
                                            ('RQ1','mined_pct'),
                                            ('RQ2', 'retrieved'),
                                            ('RQ2','sampled'),
                                            ('RQ2','sampled_pct'),
                                           ])
title_masks=['testability_body','testability','Refactor for test','Dependency','Concurrency','Network','Singleton','Inject','test','Other']
#table2.index=table2.title_mask
#
#table2[title_masks]
#[x for x in table2['title_mask']]
table2.index=[x[0] for x in table2['title_mask'].values.tolist()]
table2.loc['Total']=table2.drop(columns='title_mask').sum()
table2.loc['Total','title_mask']='Total'
#table2['retrieved'] = table2['retrieved'].astype(int)
latab=table2.reindex(title_masks+['Total']).reset_index(drop=True)
def _color_if_even(s):
    return ['\rowcolor{palegrey}' if val % 2 == 0 else '' for val in s]
latab.style.applymap(_color_if_even)
latab.style.applymap(lambda s: 'banana', subset=[('RQ1','mined')])
latab[latab['title_mask']['title_mask']=='Total']=latab[latab['title_mask']['title_mask']=='Total'].round()
print(rows_to_latex(latab,index=False, formatters=\
                   {('RQ1','mined'):lambda s: '%d' % s,\
                   ('RQ2','retrieved'):lambda s: '%d' % s,\
                    ('RQ2','sampled'):lambda s: '%d' % s,\
                   }) )
latab

In [None]:
from collections import OrderedDict
title_masks_words=OrderedDict({
    'testability_body':'testability OR testable',
    'testability':'testability OR testable',
    'Refactor for test':'refactor AND (test OR junit)',
    'Dependency':'depend',
    'Concurrency':'concurren OR thread OR sleep OR latch',
    'Network':'network OR socket OR connectivity OR connection',
    'Singleton':'singleton',
    'Inject':'inject OR wire OR wiring',
    'test':'test OR junit',
    'Other':'Anything else'
})
title_masks_df=pd.DataFrame(title_masks_words.keys(),index=['M'+str(x+1) for x in range(len(title_masks_words))])
title_masks_df['ID']=title_masks_df.index
title_masks_df['Name']=title_masks_df[0]
title_masks_df['Keywords sought for']=title_masks_words.values()
latab['Name']=latab[('title_mask','title_mask')]
merged_table_title_masks=title_masks_df.merge(latab, left_on='Name', right_on='Name')
merged_table_title_masks=merged_table_title_masks.rename(columns=\
    {('RQ2','retrieved'):'Ret.',
     ('RQ2','sampled'):'Rev.',
     ('RQ2','sampled_pct'):'%'
    })[['ID','Name','Keywords sought for','Ret.','Rev.','%']]
merged_table_title_masks['Ret.']=merged_table_title_masks['Ret.'].astype(int)
merged_table_title_masks['Rev.']=merged_table_title_masks['Rev.'].astype(int)
merged_table_title_masks.loc['Total']=merged_table_title_masks[['Ret.','Rev.','%']].sum().round()
merged_table_title_masks['Ret.']=merged_table_title_masks['Ret.'].astype(int)
merged_table_title_masks['Rev.']=merged_table_title_masks['Rev.'].astype(int)

merged_table_title_masks.loc['Total','ID']=''
merged_table_title_masks.loc['Total','Name']=''
merged_table_title_masks.loc['Total','Keywords sought for']='Total'
merged_table_title_masks

In [None]:
latex_vars['monecount']=merged_table_title_masks.loc[merged_table_title_masks.ID.isin(['M1']),'Rev.'].sum()
latex_vars['mtwocount']=merged_table_title_masks.loc[merged_table_title_masks.ID.isin(['M2']),'Rev.'].sum()
latex_vars['mthreecount']=merged_table_title_masks.loc[merged_table_title_masks.ID.isin(['M3']),'Rev.'].sum()


In [None]:
latex_vars['monetwothreecount']=merged_table_title_masks.loc[merged_table_title_masks.ID.isin(['M1','M2','M3']),'Rev.'].sum()
latex_vars['mfourtotencount']=merged_table_title_masks.loc[merged_table_title_masks.ID.\
    isin(['M4','M5','M6','M7','M8','M9','M10']),'Rev.'].sum()
latex_vars['mfourtotencount']


In [None]:
def rows_to_latex(df, header=False,index=True, grey_idx=0):
    lines=df.to_latex(header=header,index=index).split('\n')

    lines=[line for line in lines if not re.match(r'\\begin|\\end|\\toprule|\\midrule|\\bottomrule', line)]
    lines=['\\rowcolor{palegrey}' + x[0]  if x[1] % 2 == grey_idx else x[0] for x in zip(lines,range(len(lines)))]
    return '\n'.join(lines)

In [None]:
merged_table_title_masks_lines=rows_to_latex(merged_table_title_masks, index=False, header=True, grey_idx=1).split('\n')
merged_table_title_masks_hline_idx=[x*2+2 for x in merged_table_title_masks.index[merged_table_title_masks['ID'].isin(['M1','M4','M8','M10'])]]
for x in [5,12,15]:
    merged_table_title_masks_lines.insert(x, '\hline')
['%d:%s' %p for p in zip(range(len(merged_table_title_masks_lines)),merged_table_title_masks_lines)]


In [None]:
merged_table_title_masks_s='\n'.join(merged_table_title_masks_lines)
merged_table_title_masks_s=merged_table_title_masks_s.replace('midrule','hline')

In [None]:
open('table_merged_table_title_masks.tex','w').write(merged_table_title_masks_s)

In [None]:
double_reviewed=primary_reviewed.append(secondary_reviewed,sort=True)[['url','reviewer']].drop_duplicates()
double_reviewed=double_reviewed.merge(manually_reviewed[['url','title_mask']].drop_duplicates())
double_reviewed.reviewer.value_counts()

In [None]:
print(len(set(manually_reviewed.url)))
manually_reviewed['reviewerN']=manually_reviewed['reviewer']
sel=double_reviewed[double_reviewed.reviewer!='Reviewer1'].append(manually_reviewed[~manually_reviewed.url.isin(double_reviewed.url)]\
                           [['url','title_mask','reviewer']].drop_duplicates(), sort=True)
len(sel)
reviewers=pd.crosstab(sel.title_mask,sel.reviewer,margins=True)
reviewers
print(rows_to_latex(reviewers, header=True, index=True))
reviewers

In [None]:
s=df_to_latex(reviewers.reindex(title_masks+['All']),index=True)
print(s)

In [None]:
sel=manually_reviewed[['reviewerN','title_mask','url','pr_group']].drop_duplicates()
irr=sel[sel.pr_group=='irrelevant']
proportion_irrelevant=100.0*(pd.crosstab(irr.title_mask, irr.reviewerN)/pd.crosstab(sel.title_mask, sel.reviewerN)).round(3)


In [None]:
rf_mined=pd.read_csv('mined30k.csv')

In [None]:
rf_mined['project']=rf_mined['url'].apply(extract_project)
rf_mined=rf_mined[rf_mined['project'].isin(allowed_projects)]

In [None]:
rf_mined['with_test_pairs']=rf_mined['test_pairs'].apply(lambda x: x>0)
rf_mined[['url','with_test_pairs']].drop_duplicates()['with_test_pairs'].value_counts()
sel=rf_mined[rf_mined['with_test_pairs']==True]


In [None]:
sel=manually_reviewed[['pr_group','url']].drop_duplicates()
sel.pr_group.value_counts()

In [None]:
rel_reviewed=manually_reviewed[(manually_reviewed.pr_group!='irrelevant') & (manually_reviewed.ref_pattern!='irrelevant')]
patterns=rel_reviewed.rename(columns={'ref_pattern':'pattern'})[['url','pattern']].drop_duplicates()
patterns=rel_reviewed.rename(columns={'ref_pattern':'pattern'})[['url','test_location','pattern']].drop_duplicates()


In [None]:
def summary_by_pattern(f):
    patterns=rel_reviewed.rename(columns={'ref_pattern':'pattern'})[['url','test_location','pattern']].drop_duplicates()
    patterns['pattern']=patterns['pattern'].apply(f)
    count_by_pattern=patterns.groupby('pattern').count().reset_index().rename(columns={'url':'count'})[['pattern','count']]
    count_by_pattern=count_by_pattern.groupby('pattern').sum().reset_index().sort_values('count',ascending=False)
    count_by_pattern=count_by_pattern[count_by_pattern.pattern!='Other'].append(count_by_pattern[count_by_pattern.pattern=='Other'])
    count_by_pattern['percentage']=round(100*count_by_pattern['count']/sum(count_by_pattern['count']),1)
    count_by_pattern=count_by_pattern[count_by_pattern.pattern!='other'].append(count_by_pattern[count_by_pattern.pattern=='other'])
    count_by_pattern.loc['Total']=count_by_pattern.sum()
    count_by_pattern.loc['Total','pattern']='Total'
    count_by_pattern.loc['Total','percentage']=round(count_by_pattern.loc['Total','percentage'],0)
    count_by_pattern['index']=[str(x) for x in range(1,len(count_by_pattern)+1)]
    count_by_pattern.loc[count_by_pattern.pattern.isin(['Total','other']),'index']=''
    count_by_pattern['index']=count_by_pattern['index'].astype(str)
    return count_by_pattern[['index','pattern','count','percentage']]


In [None]:
#TABLE VII: Frequency of testability refactoring patterns
count_by_pattern=summary_by_pattern(lambda s: s)
#frequency-patterns
count_by_pattern_lines=rows_to_latex(count_by_pattern[['index','pattern','count','percentage']], \
    index=False).split('\n')

count_by_pattern_lines.insert(10,'\n\hline')
count_by_pattern_lines.insert(12,'\n\hline')
count_by_pattern_s='\n'.join(count_by_pattern_lines)
open('table_frequency-patterns.tex','w').write(count_by_pattern_s)
count_by_pattern

In [None]:
latex_vars['refpatternscount']=len(count_by_pattern[~count_by_pattern.pattern.isin(['other','Total'])])

In [None]:
sel=manually_reviewed[(manually_reviewed.ref_pattern!='irrelevant') & (manually_reviewed.pr_group!='irrelevant')]
sel['relevant']=['testability' if x in ['testability','testability_body'] else 'other' for x in sel.title_mask]
p=pd.DataFrame(sel['title_mask'].value_counts())
p['title']=p.index
p['ntitle']=p[['title','title_mask']].apply(lambda row: row[0] + ' (N=' + str(row[1]) +')',axis=1)
p['title_mask']=p.index
p[['title_mask','ntitle']]
p=p.reindex(title_masks)
sel=sel.merge(p,on='title_mask')
sam=pd.crosstab(sel.ref_pattern,sel.ntitle).apply(lambda r: 100.0*r/r.sum(),axis=0)
image=sns.heatmap(sam[p['ntitle']], annot=True, cbar=False, fmt=".0f", cmap="YlGn")
plt.ylabel('Refactoring patterns')
plt.xlabel('PR masks')
plt.xticks(rotation=45,ha='right')
image.figure.savefig(IMAGES_PATH+'refpattern-title_masks.eps', transparent=False, bbox_inches='tight')
image

In [None]:
count_by_base_pattern=summary_by_pattern(lambda s: re.sub(r"_for_.*", "",s) if isinstance(s,str) else None)

In [None]:
def calc_prop_by_pr_group(result_by_url):
    m=pd.crosstab(result_by_url.title_mask, result_by_url.pr_group, margins=True).reset_index()
    all_prs_by_title_mask=all_prs[all_prs['selected']=='small_with_tpairs'][['title_mask','url']].groupby('title_mask').count().reset_index().rename(columns={'url':'PRs_small_with_tpairs'})
    m=m.rename(columns={'All':'manually_reviewed'})
    m['irrelevant_ratio']=m['irrelevant']/m['manually_reviewed']
    counts_by_masks=m
    m=m[m['title_mask'] != 'All'].sort_values('irrelevant_ratio').append(m[m['title_mask']=='All'])
    def get_count_with_percent(xs):
        return pd.Series(["%d (%.1f%%)" % (x, x/sum(xs)*100) for x in xs])
    result_types=['incl_ref_for_test','irrelevant','only_ref_for_test']
    m[result_types] = m[result_types].apply(get_count_with_percent,axis=1)
    m=m.drop(columns=['irrelevant_ratio','manually_reviewed'])
    m.index=m['title_mask']
    m=m.reindex(title_masks+['All'])
    m.loc[m.title_mask=='All','title_mask']='Total (N=' + str(len(set(result_by_url.url)))+')'
    m=m[['title_mask','irrelevant','only_ref_for_test','incl_ref_for_test']]
    return m
table_all_prs_by_mask=calc_prop_by_pr_group(result_by_url)
table_all_prs_by_mask

In [None]:
latex_vars['percentonlyreffortest'] = round(len(result_by_url[result_by_url.pr_group=='only_ref_for_test']) /\
    len(result_by_url) * 100,2)
latex_vars['percentinclreffortest'] = round(len(result_by_url[result_by_url.pr_group=='incl_ref_for_test']) /\
    len(result_by_url) * 100,2)
latex_vars['percenttestabilitytitle'] = round(len(result_by_url[\
    (result_by_url.title_mask=='testability') & (result_by_url.pr_group!='irrelevant')]) /\
    len(result_by_url[result_by_url.title_mask=='testability']) * 100,2)
latex_vars['percenttestabilitybody'] = round(len(result_by_url[\
    (result_by_url.title_mask=='testability_body') & (result_by_url.pr_group!='irrelevant')]) /\
    len(result_by_url[result_by_url.title_mask=='testability_body']) * 100,2)

In [None]:
table_all_prs_by_mask_lines=rows_to_latex(table_all_prs_by_mask, index=False).split('\n')
table_all_prs_by_mask_lines.insert(len(table_all_prs_by_mask_lines)-2,'\hline')
table_all_prs_by_mask_s='\n'.join(table_all_prs_by_mask_lines)
open('table_all_prs_by_mask.tex','w').write(table_all_prs_by_mask_s)

In [None]:
def mark_reviewed_prs_with_test_pairs(mined, manually_reviewed):
    if 'pr_group' in mined:
        mined=mined.merge(manually_reviewed[['url','pr_group']].drop_duplicates())
        mined['with_test_pairs']=False
        mined.loc[mined.pr_group!='irrelevant','with_test_pairs']=True
    return mined

def exclude_too_large_commits(mined, all_prs_with_files):
    # remove commits that are too large and likely to be merge commits
    java_files_by_url=all_prs_with_files[all_prs_with_files.changedFile.str.endswith('.java')][['url','changedFile']].groupby('url').count()
    cafter_by_url=mined.groupby(['url','commit']).agg({'classesAfter':lambda x: len(set(x))}).sort_values('classesAfter').reset_index()
    commits=cafter_by_url.merge(java_files_by_url.reset_index())
    irrelevant_commits=commits[commits.classesAfter > commits.changedFile]['commit'].drop_duplicates()
    mined=mined[~mined.commit.isin(irrelevant_commits)]
    return mined

def merge_refs_on_prod_file(mined, manually_reviewed):
    dmined=mined
    derived=manually_reviewed
    derived['prod_file_className']=derived['prod_file'].apply(extract_simple_file_name)
    dmined['classNameBefore']=dmined['classesBefore'].apply(extract_simple_class_name)
    dmined['classNameAfter']=dmined['classesAfter'].apply(extract_simple_class_name)
    m=dmined[dmined.pr_group!='irrelevant'].merge(derived[['url','prod_file_className']].drop_duplicates(), left_on=['url','classNameBefore'], right_on=['url','prod_file_className'])
    m=m.append(dmined[dmined.pr_group!='irrelevant'].merge(derived[['url','prod_file_className']].drop_duplicates(), left_on=['url','classNameAfter'], right_on=['url','prod_file_className']))
    m=m.append(dmined[dmined.pr_group=='irrelevant'])
    m=m.drop_duplicates()
    return m


In [None]:
%%time
import matplotlib.pyplot as plt
import numpy as np
import time
if not os.path.isfile('cached_mined_manually.csv'):
    mined_manually=get_manually_mined(all_prs, manually_reviewed)
    mined_manually.to_csv('cached_mined_manually.csv',index=False)
else:
    mined_manually=pd.read_csv('cached_mined_manually.csv')
mined_manually['project']=mined_manually['url'].apply(extract_project)
mined_manually=mined_manually[mined_manually.project.isin(allowed_projects)]
mined=mined_manually
print(len(mined))
mined=exclude_refactored_tests(mined)
print(len(mined))
mined=mark_reviewed_prs_with_test_pairs(mined, manually_reviewed)
print(len(mined))
mined=mined.drop_duplicates()
print(len(mined))
#mined=exclude_too_large_commits(mined, all_prs_with_files)
print(len(mined))
mined=merge_refs_on_prod_file(mined,manually_reviewed)
print(len(mined))
means=calc_means_and_counts_by_ref_type(mined,4)
def plot_ci_reviewed_means(mined, means, leg_title):
    a1=get_refs_per_url(mined, True)
    a1['with_tpairs']=True
    a2=get_refs_per_url(mined, False)
    a2['with_tpairs']=False
    a=a1.append(a2)
    melted=pd.melt(a.reset_index(), id_vars=['url','with_tpairs'])
    means['refactoringTitle']=means['refactoringType'].apply(as_title)
    melted['variableTitle']=melted['variable'].apply(as_title)
    melted['hue']=melted['with_tpairs'].apply(lambda x: 'Testability relevant PRs (N=' + str(len(a1)) + ')' if x else 'Testability irrelevant PRs (N=' + str(len(a2)) + ')')
    plt.figure(figsize=(5, 8))
    ax = sns.pointplot(x="value", y="variableTitle", hue="hue",
                   data=melted[melted.variableTitle.isin(means.refactoringTitle)],dodge=0.2,
                   order=means.sort_values('ratio',ascending=False)['refactoringTitle'],
                   markers=["o", "x"],capsize=.2,
                   linestyles=[" ", " "])
    plt.legend(loc='best',bbox_to_anchor=(0.23, 0.85))
    plt.xticks(rotation=0)
    plt.xlabel('Frequency per PR')
    plt.ylabel('')
    return ax

#image=plot_ci_reviewed_means(mined,means,'Testability relevant PRs')
#plot_violins(mined)
image

In [None]:
def plot_ci_generic_means(mined, means, pos_title, neg_title):
    a1=get_refs_per_url(mined, True)
    a1['with_tpairs']=True
    a2=get_refs_per_url(mined, False)
    a2['with_tpairs']=False
    a=a1.append(a2)
    melted=pd.melt(a.reset_index(), id_vars=['url','with_tpairs'])
    means['refactoringTitle']=means['refactoringType'].apply(as_title)
    means['refactoringTitleSig']=means[['significance','refactoringType']].apply(as_title_sig, axis=1)

    melted['variableTitle']=melted['refactoringType'].apply(as_title)
    melted['hue']=melted['with_tpairs'].apply(lambda x: pos_title+' (N=' + str(len(a1)) + ')' if x else neg_title + ' (N=' + str(len(a2)) + ')')
    plt.figure(figsize=(5, 12))
    melted=melted.merge(means[['refactoringTitle','refactoringTitleSig']].drop_duplicates(), left_on='variableTitle',right_on='refactoringTitle')
    sel=melted[melted.variableTitle.isin(means.refactoringTitle)]

    ax = sns.pointplot(x="value", y="refactoringTitleSig", hue="hue",
                   data=melted[melted.variableTitle.isin(means.refactoringTitle)],dodge=0.2,
                   order=means.sort_values('ratio',ascending=False)['refactoringTitleSig'],
                   markers=["o", "x"],capsize=.2,
                   linestyles=[" ", " "])
    plt.legend(loc='best',bbox_to_anchor=(0.4, 0.75))
    plt.xticks(rotation=0)
    plt.xlabel('Frequency per PR')
    plt.ylabel('')
    return ax

In [None]:
image=plot_ci_generic_means(mined,means,'Testability relevant PRs', 'Testability irrelevant PRs')
image.figure.savefig(IMAGES_PATH+'reviewed-ci-means-sig.eps', transparent=False, bbox_inches='tight')


In [None]:
def add_to_latex_vars(percentage_by_pattern):
    for i,row in percentage_by_pattern.iterrows():
        print(row['pattern'])
        latex_vars['percent'+row['pattern'].replace('_','').lower()]=str(row['percentage']) + '\\% '
        latex_vars['percent'+row['pattern'].replace('_','').lower()]=str(row['percentage']) + '\\% '
add_to_latex_vars(count_by_base_pattern)
add_to_latex_vars(count_by_pattern)

In [None]:
%%time
cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=True)

sel=mined[(mined.pr_group!='irrelevant')&(mined.ref_pattern!='irrelevant')][['refactoringType','ref_pattern']]
freq_patterns=sel['ref_pattern'].value_counts()
freq_patterns=freq_patterns[freq_patterns>=5]
sel=sel[sel.ref_pattern.isin(freq_patterns.index)]
sel['refactoringTitle']=sel['refactoringType'].apply(as_title)
pam=pd.crosstab(sel.refactoringTitle, sel.ref_pattern)
pam=pam.apply(lambda r: 100.0*r/r.sum(),axis=0)
pam=pam[[x for x in freq_patterns.index]]
fig=plt.figure(figsize = (7,15))
#image=sns.heatmap(pam, annot=True, cbar=False, fmt=".0f", cmap=cmap)
image=sns.heatmap(pam, annot=True, cbar=False, fmt=".0f", cmap="YlGn")
plt.ylabel('')
plt.xlabel('Testability refactoring pattern')
plt.xticks(rotation=45,ha='right')
image.figure.savefig(IMAGES_PATH+'refminer-patterns-colour2.eps', transparent=False, bbox_inches='tight')
image

In [None]:
# RQ 1

In [None]:
%%time

prs_to_mine=pd.read_csv('prs_to_sample2.csv')
if not os.path.isfile('cached_30k_mined.csv'):
    mined=mined_3k=get_3k_mined(prs_to_mine)
    mined.to_csv('cached_30k_mined.csv',index=False)
else:
    mined=mined_3k=pd.read_csv('cached_30k_mined.csv')
print(len(mined))
mined=exclude_refactored_tests(mined)
print(len(mined))
mined=mark_prs_with_test_pairs(mined)
print(len(mined))
mined=mined.drop_duplicates()
print(len(mined))
mined=exclude_too_large_commits(mined, all_prs_with_files)
print(len(mined))

def get_only_allowed_projects_by_url(df):
    df['project']=df['url'].apply(extract_project)
    return df[df['project'].isin(allowed_projects)]
mined=get_only_allowed_projects_by_url(mined)
mined_3k=get_only_allowed_projects_by_url(mined_3k)
prs_to_mine=get_only_allowed_projects_by_url(prs_to_mine)
# limit to 10k
prs_to_mine_resampled=prs_to_mine[prs_to_mine.test_pairs==0].sample(10000, random_state=42)
prs_to_mine_resampled=prs_to_mine_resampled.append(prs_to_mine[prs_to_mine.test_pairs>0].sample(10000, random_state=42))
prs_to_mine=prs_to_mine_resampled

In [None]:
latex_vars['tpprminedcount']=len(prs_to_mine[prs_to_mine['test_pairs']==0])
latex_vars['otherprminedcount']=len(prs_to_mine[prs_to_mine['test_pairs']>0])

In [None]:
mined_3k['with_test_pairs']=mined_3k.test_pairs.apply(lambda x: tp_pr_name if x > 0 else 'Other PRs')
prs_to_mine['mined_already']=prs_to_mine['url'].isin(set(mined_3k['url']))
prs_to_mine['mined_already'].value_counts()

In [None]:
%%time
#prs_to_mine=pd.read_csv('prs_to_sample2.csv')
#mined_3k=get_3k_mined(prs_to_mine)

mined_3k['with_test_pairs']=mined_3k.test_pairs.apply(lambda x: tp_pr_name if x > 0 else 'Other PRs')

refactorings_summary=pd.DataFrame()
other_pr_name='Other PRs'
sel=prs_to_mine
sel['with_test_pairs']=sel.test_pairs.apply(lambda x: tp_pr_name if x > 0 else 'Other PRs')
g=sel[['with_test_pairs','url']].groupby('with_test_pairs').count().transpose()
g.index=['PR count']
g.columns.name='Metric'
refactorings_summary=refactorings_summary.append(g)


sel=mined_3k[['url','with_test_pairs']].drop_duplicates()
g=sel.groupby('with_test_pairs').count().transpose()
g.index=['PRs with refactorings']
g.columns.name='Metric'
refactorings_summary=refactorings_summary.append(g)

sel=mined_3k[['url','with_test_pairs']]
g=sel.groupby('with_test_pairs').count().transpose()
g.index=['Mined refactorings']
g.columns.name='Metric'
refactorings_summary=refactorings_summary.append(g)

sel=all_prs.merge(prs_to_mine[['url','mined_already']].drop_duplicates(),on='url')
sel['with_test_pairs']=sel.test_pairs.apply(lambda x: tp_pr_name if x > 0 else other_pr_name)
sel['Average LOC added per PR']=sel['prod_additions']+sel['test_additions']
sel['Average LOC deleted per PR']=sel['prod_deletions']+sel['test_deletions']
sel['Average churn LOC per PR']=sel['Average LOC deleted per PR']+sel['Average LOC added per PR']
xs=['Average LOC added per PR','Average LOC deleted per PR','Average churn LOC per PR']
g=sel.groupby(['url','with_test_pairs']).agg({x:'sum' for x in xs}).groupby('with_test_pairs').agg({x:'mean' for x in xs}).transpose()
refactorings_summary=refactorings_summary.append(g)
refactorings_summary.loc['Average mined refactorings per PR']=refactorings_summary.loc['Mined refactorings']/refactorings_summary.loc['PRs with refactorings']
refactorings_summary=refactorings_summary[[tp_pr_name,other_pr_name]]
refactorings_summary


In [None]:
average_idx=refactorings_summary.index.str.contains('Average')
table_mined_datasets_lines_a=rows_to_latex(refactorings_summary[~average_idx].astype(int)).split('\n')
table_mined_datasets_lines_b=rows_to_latex(refactorings_summary[average_idx].round(1), grey_idx=1).split('\n')
table_mined_datasets_lines=table_mined_datasets_lines_a+table_mined_datasets_lines_b
table_mined_datasets_s='\n'.join(table_mined_datasets_lines)
open('table_mined_datasets.tex','w').write(table_mined_datasets_s)

In [None]:
x=result_by_url.merge(all_prs,on='url')
xm=pd.melt(x[['pr_group','prod_additions','test_additions','prod_deletions','test_deletions']], id_vars=['pr_group'])
xm=xm.rename(columns={'pr_group':'group','value':'LOC'})
image=sns.boxplot(x="variable", y="LOC",
            hue="group",
            palette='gray',
            width=0.6,
            data=xm, showfliers = False)
image.figure.savefig(IMAGES_PATH+'loc_by_pr_group.eps', transparent=False, bbox_inches='tight')

In [None]:
def calc_means_and_counts_by_ref_type(mined, min_count=5):
    w1=agg_data(mined, True)
    w2=agg_data(mined, False)
    w=w2.merge(w1,on='refactoringType')
    with_tests_cnt=len(set(mined[mined.test_pairs>0]['prid']))
    without_tests_cnt=len(set(mined[mined.test_pairs==0]['prid']))

    w['significance']=w[['with_test_pairs_cnt','without_test_pairs_cnt']].apply(lambda row: round(proportions_ztest([row[0], row[1]], [with_tests_cnt,without_tests_cnt])[1],4), axis=1)

    w=w[(w.without_test_pairs_cnt>min_count) & (w.with_test_pairs_cnt > min_count)]
    w['ratio']=w['with_test_pairs']/w['without_test_pairs']
    return w.sort_values('ratio')
means=calc_means_and_counts_by_ref_type(mined)

In [None]:
def calculate_refactorings_summary(mined_3k):
    refactorings_summary=pd.DataFrame()
    refactorings_summary.loc['PR count',tp_pr_name]=15000
    refactorings_summary.loc['PR count','Other PRs']=15000
    mined_3k['with_test_pairs']=mined_3k.test_pairs.apply(lambda x: tp_pr_name if x > 0 else 'Other PRs')
    sel=mined_3k[['url','with_test_pairs']].drop_duplicates()
    g=sel.groupby('with_test_pairs').count().transpose()
    g.index=['PRs with refactorings']
    g.columns.name='Metric'
    refactorings_summary=refactorings_summary.append(g)

    sel=mined_3k[['url','with_test_pairs']]
    g=sel.groupby('with_test_pairs').count().transpose()
    g.index=['Mined refactorings']
    g.columns.name='Metric'
    refactorings_summary=refactorings_summary.append(g)

    sel=all_prs.merge(prs_to_mine[['url','mined_already']].drop_duplicates(),on='url')
    sel['with_test_pairs']=sel.test_pairs.apply(lambda x: tp_pr_name if x > 0 else 'Other PRs')
    sel['Average LOC added per PR']=sel['prod_additions']+sel['test_additions']
    sel['Average LOC deleted per PR']=sel['prod_deletions']+sel['test_deletions']
    sel['Average churn LOC per PR']=sel['Average LOC deleted per PR']+sel['Average LOC added per PR']
    xs=['Average LOC added per PR','Average LOC deleted per PR','Average churn LOC per PR']
    g=sel.groupby(['url','with_test_pairs']).agg({x:'sum' for x in xs}).groupby('with_test_pairs').agg({x:'mean' for x in xs}).transpose()
    refactorings_summary=refactorings_summary.append(g)
    refactorings_summary.loc['Average mined refactorings per PR']=refactorings_summary.loc['Mined refactorings']/refactorings_summary.loc['PRs with refactorings']
    return refactorings_summary.round(1)[[tp_pr_name,'Other PRs']]


In [None]:
latex_vars['percentrelevantpr']=str(round(len(result_by_url[result_by_url['pr_group']!='irrelevant'])/len(result_by_url)*100)) + '\\% '
latex_vars['percentirrelevantpr']=str(round(len(result_by_url[result_by_url['pr_group']=='irrelevant'])/len(result_by_url)*100)) + '\\% '


In [None]:
for e in manually_reviewed[['url','title_mask']].drop_duplicates().title_mask.value_counts().iteritems():
    print(e)
    latex_vars['count'+e[0].replace(' ','').lower()]=e[1]

In [None]:
manually_reviewed.reviewer.value_counts()

In [None]:
hiwi_sampled=len(manually_reviewed[manually_reviewed.source.isin(['hiwi1','hiwi2','hiwi3'])])
latex_vars['hiwisamplereviewedcount']=len(secondary_reviewed[secondary_reviewed.reviewer!='Reviewer1'][['reviewer','url']].drop_duplicates())
latex_vars['hiwimasksreviewedcount']=len(secondary_reviewed[secondary_reviewed.reviewer=='Reviewer1'][['reviewer','url']].drop_duplicates())
latex_vars['doublereviewedcount']=len(set(double_reviewed.url))


In [None]:
len(rel_reviewed[manually_reviewed.ref_pattern.isin(['extract_method_for_invocation',
                                            'extract_method_for_override',
                                            'extract_class_for_invocation',
                                            'extract_class_for_override',
                                            'widen_access_for_invocation',
                                            'widen_access_for_override',
                                            'create_constructor',
                                            'add_constructor_param'
                                           ])]['url'].drop_duplicates())

In [None]:
latex_vars['commonpatternsprcount']=len(set(rel_reviewed[rel_reviewed.ref_pattern.isin(['extract_method_for_invocation',
                                            'extract_method_for_override',
                                            'extract_class_for_invocation',
                                            'extract_class_for_override',
                                            'widen_access_for_invocation',
                                            'widen_access_for_override',
                                            'create_constructor',
                                            'add_constructor_param'
                                           ])]['url']))

In [None]:
latex_vars['unobviouspatternsprcount']=len(set(manually_reviewed[(manually_reviewed.pr_group!='irrelevant') & (manually_reviewed.ref_pattern=='other')]['url']))

In [None]:
for k, v in latex_vars.items():
    print(k, v)

f=open('variables.tex','w')
for k, v in latex_vars.items():
    f.write('\\newcommand{\\' + k.replace('_','') + '}[0]{' + str(v) + ' }\n')
f.close()
