In [6]:
import pandas as pd
from collections import Counter, defaultdict
from krippendorff_alpha import krippendorff_alpha
from nltk.metrics.distance import masi_distance

In [7]:
code2theme = {
    "source:no-longer-maintained": "source:deprecation",
    "source:outdated": "source:deprecation",
    "source:vulnerability": "source:vulnerability",
    "source:issue": "source:issue",
    "source:other": "other",
    "target:feature": "target:feature",
    "target:ease-of-use": "target:usability",
    "target:performance": "target:performance",
    "target:flexibility": "target:usability",
    "target:activity": "source:deprecation",
    "target:size": "target:size",
    "target:stability": "target:stability",
    "target:popularity": "target:popularity",
    "target:other": "other",
    "project:compatibility:license": "project:license",
    "project:compatibility:other-library": "project:integration",
    "project:compatibility:environment": "project:integration",
    "project:consistency:upstream": "project:integration",
    "project:consistency:downstream": "project:integration",
    "project:consistency:within-project": "project:simplification",
    "project:organizational": "project:organizational",
    "project:other": "other",
    "_": "other"
}

coding = pd.read_excel("data/coding.xlsx").fillna("")

# Check for trivial labeling errors
for idx, row in coding[coding.reasonText != ""].iterrows():
    for code in row.reasonCode1.split(","):
        if code not in code2theme:
            print(idx, row.reasonCode1)
    for code in row.reasonCode2.split(","):
        if code not in code2theme:
            print(idx, row.reasonCode2)
            
print(Counter(coding.type))
print(Counter(coding[coding.reasonText != ""].type))

Counter({'commit': 3340, 'pull request': 2775, 'issue': 385})
Counter({'commit': 351, 'pull request': 233, 'issue': 112})


In [8]:
def get_agreement(data1, data2):
    data = coding[coding.reasonText != ""]
    agreed_items, total_items = 0, 0
    rater1_count, rater2_count = Counter(), Counter()
    for item1, item2 in zip(data1, data2):
        item1, item2 = set(item1.split(",")), set(item2.split(","))
        agreed_items += len(item1 & item2)
        total_items += (len(item1) + len(item2)) / 2
        rater1_count.update(item1)
        rater2_count.update(item2)
    agreement = agreed_items / total_items
    acc_agreement = sum(rater1_count[x] * rater2_count[x] for x in rater1_count) / total_items**2
    cohen_kappa = 1 - (1 - agreement) / (1 - acc_agreement)
    return agreement, cohen_kappa
data = coding[coding.reasonText != ""]
agreement, cohen_kappa = get_agreement(data.reasonCode1, data.reasonCode2)
print(f"Observed Agreement: {agreement:.3f}")
print(f"Cohen's Kappa: {cohen_kappa:.3f}")

Observed Agreement: 0.761
Cohen's Kappa: 0.744


In [9]:
theme1, theme2 = [], []
for idx, row in coding.iterrows():
    if row.reasonText != "":
        theme1.append(",".join(set(map(lambda x: code2theme[x], row.reasonCode1.split(",")))))
        theme2.append(",".join(set(map(lambda x: code2theme[x], row.reasonCode2.split(",")))))
    else:
        theme1.append("")
        theme2.append("")
coding["theme1"] = theme1
coding["theme2"] = theme2

final_themes = []
for idx, row in coding.iterrows():
    if row.reasonText != "":
        if set(row.theme1.split(",")) == set(row.theme2.split(",")):
            final_themes.append(row.theme1)
        else:
            final_themes.append("undecided")
    else:
        final_themes.append("")
# coding["finalTheme"] = final_themes
data = coding[coding.reasonText != ""]
agreement, cohen_kappa = get_agreement(data.theme1, data.theme2)
print(f"Observed Agreement: {agreement:.3f}")
print(f"Cohen's Kappa: {cohen_kappa:.3f}")

Observed Agreement: 0.839
Cohen's Kappa: 0.818


References
1. Passonneau, Rebecca. "Measuring agreement on set-valued items (MASI) for semantic and pragmatic annotation." (2006).
2. Krippendorff, Klaus. "Computing Krippendorff's alpha-reliability." (2011).
3. Krippendorff, Klaus. Content analysis: An introduction to its methodology. Sage publications, 2018.

In [10]:
krippendorff_alpha((data.reasonCode1, data.reasonCode2), metric=masi_distance, convert_items=lambda x: set(x.split(",")))

0.7312805955919376

In [11]:
krippendorff_alpha((data.theme1, data.theme2), metric=masi_distance, convert_items=lambda x: set(x.split(",")))

0.8022281178355536

In [7]:
# coding.to_excel("tmp.xlsx", index=False)

In [23]:
rule2type = pd.read_csv("data/rules_typed.csv")
rule2type = {(row.fromLib, row.toLib): row.type for idx, row in rule2type.iterrows()}
theme2repos = defaultdict(set)
theme2rules = defaultdict(set)
for link, from_lib, to_lib, themes in zip(coding.link, coding.fromLib, coding.toLib, coding.finalTheme):
    if themes == "":
        continue
    for theme in themes.split(","):
        theme2repos[theme].add(link.split("/")[3] + "/" + link.split("/")[4])
    for i, l1 in enumerate(from_lib.split("\n")):
        theme2rules[theme].add((l1, to_lib.split("\n")[i]))
    for theme in themes.split(","):
        theme2repos[theme.split(":")[0]].add(link.split("/")[3] + "/" + link.split("/")[4])
    for i, l1 in enumerate(from_lib.split("\n")):
        theme2rules[theme.split(":")[0]].add((l1, to_lib.split("\n")[i]))
all_repos = set.union(*theme2repos.values())
for theme in sorted(theme2repos.keys()):
    rename_rules = [x for x in theme2rules[theme] if x in rule2type and "rename" in rule2type[x]]
    # print(rename_rules)
    print(f"{theme:25} {len(theme2repos[theme]):3}/{len(all_repos):3} ({len(theme2repos[theme]) / len(all_repos) * 100:.2f}%)" +
         f"   {len(rename_rules)}/{len(theme2rules[theme])} ({len(rename_rules)/len(theme2rules[theme]) * 100:.2f}%)")
    

other                      21/420 (5.00%)   24/42 (57.14%)
project                   188/420 (44.76%)   102/277 (36.82%)
project:integration       125/420 (29.76%)   90/207 (43.48%)
project:license            22/420 (5.24%)   4/20 (20.00%)
project:organizational      5/420 (1.19%)   9/12 (75.00%)
project:simplification     53/420 (12.62%)   21/81 (25.93%)
source                    135/420 (32.14%)   68/135 (50.37%)
source:deprecation         77/420 (18.33%)   25/54 (46.30%)
source:issue               41/420 (9.76%)   35/71 (49.30%)
source:vulnerability       26/420 (6.19%)   27/40 (67.50%)
target                    166/420 (39.52%)   77/247 (31.17%)
target:activity             6/420 (1.43%)   0/5 (0.00%)
target:feature             57/420 (13.57%)   53/125 (42.40%)
target:performance         28/420 (6.67%)   7/32 (21.88%)
target:popularity           9/420 (2.14%)   11/17 (64.71%)
target:size                10/420 (2.38%)   0/10 (0.00%)
target:stability            8/420 (1.90%)   3/8 (37