<h1>Processing for Failed Pins manual analysis</h1>

<h3>Load helper functions</h3>

In [1]:
%run ../../_utils.ipynb

<h3>Load raw <code>sample_failed_pins</code></h3>

In [66]:
dtypes = {
    'id': 'int64',
    'url': 'object',
    'title': 'object',
    'html_url': 'object',
    'body': 'object',
    'comment_issue_id': 'int64',
    'comment_url': 'object',
    'comment_body': 'object',
    'pin_status': 'category',
}
file_path = f'./sample_failed_pins.csv'
sample_failed_pins = pd.read_csv(file_path, dtype=dtypes)

<h3>Clean <code>sample_failed_pins</code> and write to csv for excel sheet</h3>

In [67]:
sample_failed_pins.drop(['body', 'comment_url', 'comment_issue_id', 'pin_status'], axis=1)
for_csv = sample_failed_pins[['id', 'url', 'title', 'comment_body', 'html_url']].copy()
for_csv['notes'] = ''
for_csv['build_fail_reason'] = ''
for_csv.to_csv(f'./ma_sample_failed_pins.csv', index=False)

<h3>Check classified samples - first classification</h3>

In [None]:
categorized_failed_pins = pd.read_excel('ma_sample_failed_pins.xlsx', index_col=0)  
are_classified = \
    categorized_failed_pins[~pd.isna(categorized_failed_pins["build_fail_reason"])]

useful_classified = are_classified[are_classified['build_fail_reason'] != 'Build not available'].copy()

In [3]:
linter_error = [
    'Linter error'
]

error_maps = {
    'Linter/Project Guideline Error': [
        'Linter error',
        'Bundle Size Error',
        'Test coverage error',
    ],
    'Incompatible Node/npm/dependency error': [
        'Incompatible Node error',
        'Actual npm error',
        'npm install error',
        'Incompatible dependency error',
        'Missing dependency'
    ],
    'Timeout/Network Error': [
        'Build timeout',
        'Connection timeout',
        'Test timeout',
        'Test timeout error',
        'Network error',
        'Network error/404 file not found',
    ],
    'Security Error': [
        'Audit error',
        'Security error',
        'Authentication error',
    ],
#     'CI Configuration/Environment Error': [
#         'Unrelated CI failure',
#         'Build failed to start',
#         'Master branch was broken',
#         'Docker error',
#         'Aborted due to warnings',
#         'Cancelled Build'
#     ],
    'Clone Error/Missing File': [
        'Clone error / Missing file',
        'Clone error'
    ],
    'Unrelated Test Failure': [
        'Unrelated test failure',
        'Browser testing error',
    ],
    'Lockfile Error': [
        'Lockfile error',
    ],
    'Syntax/Build Error': [
        'Syntax error',
        'Unrelated CI error',
        'Unrelated CI failure',
        'Build failed to start',
        'Master branch was broken',
        'Docker error',
        'Aborted due to warnings',
        'Cancelled Build',
    ],
}

for overarching_error, matching_errors in error_maps.items():
    useful_classified.loc[
        useful_classified['build_fail_reason'].isin(matching_errors),
        'build_fail_reason'
    ] = overarching_error

print(f'{len(are_classified)} are classified')
print(useful_classified['build_fail_reason'].value_counts())
print(f'{len(useful_classified)} are classified and useful')
proportions = (
    round(
        useful_classified['build_fail_reason'].value_counts() / 
        useful_classified['build_fail_reason'].count(),
        3
    )
        
).to_frame(name='proportion')\
    .rename_axis('build_fail_reason')\
    .reset_index()
proportions.head(20)

613 are classified
Syntax/Build Error                        83
Incompatible Node/npm/dependency error    67
Unrelated Test Failure                    56
Linter/Project Guideline Error            54
Clone Error/Missing File                  41
Lockfile Error                            37
Timeout/Network Error                     34
Security Error                             9
Name: build_fail_reason, dtype: int64
381 are classified and useful


Unnamed: 0,build_fail_reason,proportion
0,Syntax/Build Error,0.218
1,Incompatible Node/npm/dependency error,0.176
2,Unrelated Test Failure,0.147
3,Linter/Project Guideline Error,0.142
4,Clone Error/Missing File,0.108
5,Lockfile Error,0.097
6,Timeout/Network Error,0.089
7,Security Error,0.024


Write out samples for second author

In [146]:
to_write = useful_classified.sample(n=58, random_state=7)
to_write = to_write[['url', 'html_url']]

# to_write['build_fail_reason'].value_counts()

to_write.to_excel('sample_failed_pins_second_author.xlsx')

In [5]:
# Writing out original sample to compare with Filipes
to_write = useful_classified.sample(n=58, random_state=7)
to_write.to_excel('sample_failed_pins_first_author.xlsx')

### Check classified samples - Second classification

In [27]:
categorized_failed_pins = pd.read_excel('ma_sample_failed_pins.xlsx', index_col=0)  
are_classified = \
    categorized_failed_pins[~pd.isna(categorized_failed_pins["second_categorization"])]

useful_classified = are_classified[are_classified['second_categorization'] != 'Build not available'].copy()

  warn(msg)
  warn(msg)


In [28]:
print(useful_classified['second_categorization'].value_counts())
print(f'{len(useful_classified)} are classified and useful')

Linter Error                52
Client test case failure    52
Missing module              43
Lockfile Error              41
Tests failed to run         38
Node error                  29
Build/Linker Error          29
Timeout/Network Error       20
Dependency version error    17
Syntax Error                15
Dependency error            15
Security Error              10
Credentials Error            9
npm install error            6
npm error                    5
Name: second_categorization, dtype: int64
381 are classified and useful


In [29]:
useful_classified['overarching_error'] = ''
error_maps = {
    'Syntax/Linter/Project Guideline Error': [
        'Linter Error',
        'Syntax Error',
    ],
    'Client Test Case Failure': [
        'Client test case failure',
    ],
    'Client Tests Failing to Run Successfully': [
        'Tests failed to run',
    ],
    'Incompatible Node/NPM/Dependency Version Error': [
        'Node error',
        'npm error',
        'Dependency version error',
    ],
    'Dependency Error': [
        'npm install error',
        'Dependency error',
        'Build/Linker Error'
    ],
    'Timeout/Network Error': [
        'Timeout/Network Error',
    ],
    'Security Error': [
        'Security Error',
    ],
    'Missing File/Module': [
        'Missing module',
    ],
    'Lockfile Error': [
        'Lockfile Error',
    ],
}

for overarching_error, matching_errors in error_maps.items():
    useful_classified.loc[
        useful_classified['second_categorization'].isin(matching_errors),
        'second_categorization'
    ] = overarching_error
print(useful_classified['second_categorization'].value_counts())


Syntax/Linter/Project Guideline Error             67
Client Test Case Failure                          52
Incompatible Node/NPM/Dependency Version Error    51
Dependency Error                                  50
Missing File/Module                               43
Lockfile Error                                    41
Client Tests Failing to Run Successfully          38
Timeout/Network Error                             20
Security Error                                    10
Credentials Error                                  9
Name: second_categorization, dtype: int64


In [30]:
proportions = (
    round(
        useful_classified['second_categorization'].value_counts() / 
        useful_classified['second_categorization'].count(),
        3
    )
        
).to_frame(name='proportion')\
    .rename_axis('second_categorization')\
    .reset_index()
proportions.head(20)

Unnamed: 0,second_categorization,proportion
0,Syntax/Linter/Project Guideline Error,0.176
1,Client Test Case Failure,0.136
2,Incompatible Node/NPM/Dependency Version Error,0.134
3,Dependency Error,0.131
4,Missing File/Module,0.113
5,Lockfile Error,0.108
6,Client Tests Failing to Run Successfully,0.1
7,Timeout/Network Error,0.052
8,Security Error,0.026
9,Credentials Error,0.024


In [32]:
client_pipeline_categories = [
    'Syntax/Linter/Project Guideline Error',
    'Client Test Case Failure',
    'Incompatible Node/NPM/Dependency Version Error',
    'Missing File/Module',
    'Lockfile Error',
    'Credentials Error',
]
proportions[proportions['second_categorization'].isin(client_pipeline_categories)]['proportion'].sum()

0.6910000000000001

Calculating Kapp Value

Red (Different)
Yellow (Roughly the same - different granularity)
Green (The same)
Grey (not classified)

In [31]:
red = 3
yellow = 15
green = 33
grey = 7

count = red + yellow + green
agreement_count = yellow + green

agreement = agreement_count / count
agreement

0.9411764705882353

In [33]:
count

51

In [73]:
from sklearn.metrics import cohen_kappa_score

first_and_second_authors = pd.read_excel('sample_failed_pins_first_author.xlsx')  
first_and_second_authors = (
    first_and_second_authors[['id', 'build_fail_reason', 'Filipe Category', 'Status']]
        .rename(
            columns={
                'build_fail_reason': 'ben_init_category',
                'Filipe Category': 'filipe_init_category',
            }
        )
)

final_categories = (
    useful_classified.reset_index()[['id', 'second_categorization']].rename(columns={'second_categorization': 'final_categorization'})
)

final_categories


merged = pd.merge(
    left=first_and_second_authors,
    right=final_categories,
    on='id',
)
merged['ben_category'] = ''
merged['filipe_category'] = ''

for idx, row in merged.iterrows():
    if row['Status'] == 'Same':
        merged.iloc[idx, merged.columns.get_loc('ben_category')] = row['final_categorization']
        merged.iloc[idx, merged.columns.get_loc('filipe_category')] = row['final_categorization']
    elif row['Status'] == 'Granularity':
        merged.iloc[idx, merged.columns.get_loc('ben_category')] = row['final_categorization']
        merged.iloc[idx, merged.columns.get_loc('filipe_category')] = row['final_categorization']
    elif row['Status'] == 'Different':
        merged.iloc[idx, merged.columns.get_loc('ben_category')] = row['ben_init_category']
        merged.iloc[idx, merged.columns.get_loc('filipe_category')] = row['filipe_init_category']
    elif row['Status'] == 'Not Classified':
        merged.iloc[idx, merged.columns.get_loc('ben_category')] = pd.NA
        merged.iloc[idx, merged.columns.get_loc('filipe_category')] = pd.NA
for_kappa = merged.loc[
    (~pd.isna(merged['ben_category'])) &
    (~pd.isna(merged['filipe_category']))
]

cohen_kappa_score(for_kappa['ben_category'], for_kappa['filipe_category'])

0.9315693430656934