# Get BFCs from the list of annotated commits
They will be stored in `BFCs['estimated']`

In [107]:
import os
import pandas as pd

annotations_dir = "comparison-3"
annotations_dir = "commit-annotator"
reviews_dir = "comparison-3"

def get_BFCs(results_df):
    df = results_df[['hash', 'bfc']]
    return df

def read_csv_from_dir(dirname, filename):
    results = pd.read_csv(os.path.join('..', dirname, filename))
    return results
    
results_A = read_csv_from_dir(annotations_dir, 'annotations_Michel.csv')
results_B = read_csv_from_dir(annotations_dir, 'annotations_Abhishek.csv')
results_C = read_csv_from_dir(annotations_dir, 'annotations_David.csv')

BFCs_A = get_BFCs(results_A)
BFCs_B = get_BFCs(results_B)
BFCs_C = get_BFCs(results_C)

BFCs = pd.merge(BFCs_A, BFCs_B, on='hash', how='inner', suffixes=('A', 'B'))

BFCs = pd.merge(BFCs, BFCs_C, on='hash', how='inner', suffixes=('', 'C'))
BFCs = BFCs[['hash', 'bfcA', 'bfcB', 'bfc']]
BFCs = BFCs.rename(columns={'bfc': 'bfcC'})

In [108]:
BFCs_R = read_csv_from_dir(reviews_dir, 'review_jesus.csv')
BFCs = pd.merge(BFCs, BFCs_R, on='hash', how='left')
BFCs['bfcR'] = BFCs['bfc']
BFCs = BFCs[['hash', 'bfcA', 'bfcB', 'bfcC', 'bfcR']]

In [109]:
import math

def estimation(A, B, C, R):
    if math.isnan(R):
        return round((A + B + C) / 3)
    else:
        return R
BFCs['estimation'] = BFCs.apply(lambda row: estimation(row['bfcA'], row['bfcB'], row['bfcC'], row['bfcR']), axis=1) 

In [110]:
BFCs['estimation'].value_counts()

estimation
0.0    697
4.0    184
3.0     11
2.0     10
1.0      9
Name: count, dtype: int64

# Get commits in LTS branches

In [111]:
LTS_commits = pd.read_csv('upstream_commits.csv', converters={'branch': str})
LTS_commits['hash'] = LTS_commits['commit'].str[:10]
print(f"Upstream commits found: {len(LTS_commits)}")
print(f"Unique upstream commits found: {len(LTS_commits['hash'].unique())}")

Upstream commits found: 51748
Unique upstream commits found: 26047


# Merge and analyze annotated commits found in LTS branches

In [117]:
LTS_annotated = pd.merge(LTS_commits, BFCs, on='hash', how='left')
#LTS_annotated['estimation'].value_counts()
LTS_annotated.groupby('branch')['estimation'].value_counts()

branch  estimation
4.19    4.0           13
        0.0            7
        1.0            2
        2.0            1
5.10    4.0           24
        0.0           12
        1.0            4
        2.0            2
5.15    4.0           32
        0.0           13
        1.0            4
        2.0            2
5.4     4.0           16
        0.0            9
        1.0            2
        2.0            2
6.1     0.0            4
        4.0            3
        1.0            1
        2.0            1
Name: count, dtype: int64

In [113]:
commit_url = "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id={hash}"

LTS_annotated = LTS_annotated[LTS_annotated['estimation'].notna()]
LTS_annotated['link'] = LTS_annotated['commit'].apply(lambda hash: "<a href='" + commit_url.format(hash=hash) + "'>Link</a>")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
#LTS_annotated

In [115]:
from IPython.display import HTML

HTML(LTS_annotated.to_html(escape=False))
#HTML(LTS_annotated.loc[LTS_annotated['estimation'] < 4, :].to_html(escape=False))

Unnamed: 0,branch,commit,hash,bfcA,bfcB,bfcC,bfcR,estimation,link
2941,6.1,f0052d7a1edb3d8921b4e154aa8c46c4845b3714,f0052d7a1e,0.0,0.0,0.0,,0.0,Link
3298,6.1,65ea840afd508194b0ee903256162aa87e46ec30,65ea840afd,1.0,4.0,0.0,,2.0,Link
3489,6.1,994b7ac1697b4581b7726d2ac64321e3c840229b,994b7ac169,0.0,0.0,0.0,,0.0,Link
5215,6.1,f9cdf4130671d767071607d0a7568c9bd36a68d0,f9cdf41306,4.0,0.0,0.0,,1.0,Link
5256,6.1,8b3a9ad86239f80ed569e23c3954a311f66481d6,8b3a9ad862,4.0,4.0,4.0,,4.0,Link
5270,6.1,fd3d91ab1c6ab0628fe642dd570b56302c30a792,fd3d91ab1c,4.0,4.0,4.0,,4.0,Link
5586,6.1,99387b016022c29234c4ebf9abd34358c6e56532,99387b0160,0.0,0.0,0.0,,0.0,Link
7148,6.1,ad75bd85b1db69c97eefea07b375567821f6ef58,ad75bd85b1,4.0,4.0,4.0,,4.0,Link
7960,6.1,91592b5c0c2f076ff9d8cc0c14aa563448ac9fc4,91592b5c0c,0.0,0.0,0.0,,0.0,Link
9380,5.15,550842cc60987b269e31b222283ade3e1b6c7fc8,550842cc60,4.0,4.0,4.0,,4.0,Link
