In [24]:
import pandas as pd
import numpy as npa
import math
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import cohen_kappa_score
warnings.simplefilter(action='ignore', category=UserWarning)
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows',None)

In [25]:
david_results = pd.read_csv('130_results_david.csv').to_dict('records')
michel_results = pd.read_csv('130_results_michel.csv').to_dict('records') 

In [26]:
david_results_map = {element['commit_hash']: element for element in david_results }
michel_results_map = {element['commit_hash']: element for element in michel_results }

In [27]:
def sameResult(r1,r2):
    if r1 == r2: return True
    if r1 == 'none': 
        if r2 == 'unknown': return True
    if r1 == 'unknown': 
        if r2 == 'none': return True
    return False

In [28]:
categories = {
    "Timing and execution": 0,
    "Exchange of Information": 1,
    "Memory": 2,
    "unknown": 3
}

In [30]:
same_is_bug_fixing_commit = 0
is_bug_fixing_commit_conflict = []
both_is_bug_fixing_commit = 0

same_is_safety_related = 0
is_safety_related_conflict = []
both_is_safety_related = 0

same_type_of_safety_related = 0
type_of_safety_related_conflict = []

kappa_cohen_results = {
    'is_bug_fixing_commit': { 'Michel': [], 'david': [] },
    'is_safety_related': { 'Michel': [], 'david': [] },
    'type_of_safety_related': { 'Michel': [], 'david': [] }
}

for m_result in michel_results:
    d_result = david_results_map[m_result['commit_hash']]

    kappa_cohen_results['is_bug_fixing_commit']['Michel'].append(1 if m_result['is_bug_fixing_commit'] else 0)
    kappa_cohen_results['is_bug_fixing_commit']['david'].append(1 if d_result['is_bug_fixing_commit'] else 0)
    
    # IS BUG-FIXING COMMIT
    if d_result['is_bug_fixing_commit'] == m_result['is_bug_fixing_commit']:
        same_is_bug_fixing_commit += 1
    else:
        is_bug_fixing_commit_conflict.append(
            (d_result['is_bug_fixing_commit'],d_result['comment'],m_result['is_bug_fixing_commit'], m_result['comment'])
        )

    # CASES WHERE BOTH AGREE ON "IS BUG-FIXING COMMIT"
    if d_result['is_bug_fixing_commit'] and m_result['is_bug_fixing_commit']:

        both_is_bug_fixing_commit+=1
        
        kappa_cohen_results['is_safety_related']['Michel'].append(
            1 if m_result['is_safety_related'] == 'true' else 0 if m_result['is_safety_related'] == 'false' else 2
        )
        kappa_cohen_results['is_safety_related']['david'].append(
            1 if d_result['is_safety_related'] == 'true' else 0 if d_result['is_safety_related'] == 'false' else 2
        )  
        
        # IS SAFETY-RELATED BUG
        if d_result['is_safety_related'] == m_result['is_safety_related']:
            same_is_safety_related += 1

            # CASES WHERE BOTH AGREE ON "SAFETY-RELATED BUG"
            if d_result['is_safety_related'] == 'true' and m_result['is_safety_related'] == 'true':
                both_is_safety_related += 1
                
                kappa_cohen_results['type_of_safety_related']['Michel'].append(categories[m_result['type_of_safety_related']])                
                kappa_cohen_results['type_of_safety_related']['david'].append(categories[d_result['type_of_safety_related']])
                
                # SAFETY-RELATED BUG CATEGORY
                if d_result['type_of_safety_related'] == m_result['type_of_safety_related']:
                    same_type_of_safety_related += 1
                else:
                    type_of_safety_related_conflict.append(
                        (d_result['type_of_safety_related'], d_result['comment'], m_result['type_of_safety_related'], m_result['comment'])
                    )
        
        else:
            is_safety_related_conflict.append(
                (d_result['is_safety_related'], d_result['comment'], m_result['is_safety_related'], m_result['comment'])
            )

print("same_is_bug_fixing_commit: ", same_is_bug_fixing_commit, "/", len(michel_results), "(%.2f)"%(same_is_bug_fixing_commit*100/len(michel_results)))
print("same_is_safety_related: ", same_is_safety_related, "/", both_is_bug_fixing_commit, "(%.2f)"%(same_is_safety_related*100/both_is_bug_fixing_commit))
print("same_type_of_safety_related: ", same_type_of_safety_related, "/", both_is_safety_related, "(%.2f)"%(same_type_of_safety_related*100/both_is_safety_related))

same_is_bug_fixing_commit:  115 / 130 (88.46)
same_is_safety_related:  23 / 41 (56.10)
same_type_of_safety_related:  10 / 11 (90.91)


In [31]:
cohen_kappa_score(kappa_cohen_results['is_bug_fixing_commit']['Michel'], kappa_cohen_results['is_bug_fixing_commit']['david'])

0.7540983606557377

In [32]:
cohen_kappa_score(kappa_cohen_results['is_safety_related']['Michel'], kappa_cohen_results['is_safety_related']['david'])

0.3004739336492891

In [33]:
cohen_kappa_score(kappa_cohen_results['type_of_safety_related']['Michel'], kappa_cohen_results['type_of_safety_related']['david'])

0.8405797101449275

### Not same "Is a bug-fixing commit" reasoning

In [34]:
pd.DataFrame(is_bug_fixing_commit_conflict, columns =['david result', 'david comment', 'Michel result', 'Michel comment'])

Unnamed: 0,david result,david comment,Michel result,Michel comment
0,True,Initialization of a variable to 0; due to a bug,False,It seems to simply change how files/functions are displayed
1,True,Fixing a warning; possible future bug.,False,Considered a fix but really just avoids a warning.
2,True,It explains the behaviour of the bug and possible future consequences; and also it fixes it.,False,A condition is extended to cover only the necessary cases (and a section of code is not executed if it is not necessary).
3,False,It seems that they are refactoring more than fixing.,True,"Indicated as a ""Correct"" but i didnt understand the change"
4,True,I would say that this commit is fixing a bug about a GPU error when initialization; maybe is considered as safety related.,False,It appears to fix a change introduced earlier; in this case to supplement a case not contemplated. I am not sure to what extent it is a fix
5,True,They are fixing with this pull a livepatch bug; could be considered as safety related commit because it fixes an init fail of Livepatch.,False,Does not look like a fix
6,True,They are fixing with this pull a livepatch bug; could be considered as safety related commit because it fixes an init fail of Livepatch.,False,It only adds a description to a parameter (comment)
7,True,It is fixing a bug; but not related to safety.,False,Fixed a bug in the comprehension of some components
8,False,It is a perfomance feature; in my opinion (I think).,True,Seems to fix a performance problem
9,True,It is fixing a bug realted to the type of an offset; but I'm not sure about the type.,False,I'm not sure; it doesn't look like a bug-fix.


### Not same "Is a safety-related bug" reasoning

In [35]:
pd.DataFrame(is_safety_related_conflict, columns =['david result', 'david comment', 'Michel result', 'Michel comment'])

Unnamed: 0,david result,david comment,Michel result,Michel comment
0,unknown,Initialization of a variable to 0; due to a bug,false,Check that is a BFC after checking mail list. A variable is not initialize but returned in some cases
1,unknown,It is fixing a bug; but I don't know if it safety related.,true,Looks like a fix on bus ¿maybe a safety-relelated?
2,true,In this commit; they talk about fixing; and inside the message they specify some changes related to memory.,unknown,There are too many changes in this merge. It seems to include some fixes but each one may be completely different.
3,true,Fixing a bug realted to pointers.,unknown,A fix to avoid NPE
4,true,Fixing a bug related to the clock sequence; causing a reset on the board.,unknown,Looks like a fix; but I don't udestand the the func
5,true,It is a bug fixing commit about a pointer that was not pointing to the right clock.,unknown,Fix a clock
6,unknown,It clearly fixes a bug; but not sure about the type.,false,It is a change that tries to include some cases not contemplated so far.
7,false,Fixes a bug; for me is not a safety bug and is not obvious either.,unknown,Looks like a fix; but I don't understand exactly the code
8,unknown,It fixes two bugs in a build process; not sure if its related to safety.,false,This merge include 2 changes that looks like a fix. Add a condition to check that CONFIG_PPC_64S_HASH_MMU is set so don't trigger a build failure. I think that a build failure is not a Safety-Related Bug
9,true,It seems a safety bug related to keyctl (a command for working with keys from userspace); so for me is an exchange of information bug.,false,A parameter validation is fixed


### Not same "Safety-related bug category" reasoning

In [36]:
pd.DataFrame(type_of_safety_related_conflict, columns =['david result', 'david comment', 'Michel result', 'Michel comment'])

Unnamed: 0,david result,david comment,Michel result,Michel comment
0,unknown,Fixing a bug when about MPC that hang the system.,Timing and execution,Reversed a change that caused the program to hang
