In [1]:
import pandas as pd
from collections import Counter, defaultdict
from krippendorff_alpha import krippendorff_alpha
from nltk.metrics.distance import masi_distance

In [2]:
code2theme = {
    "source:no-longer-maintained": "source:deprecation",
    "source:outdated": "source:deprecation",
    "source:vulnerability": "source:vulnerability",
    "source:issue": "source:issue",
    "source:other": "other",
    "target:feature": "target:feature",
    "target:ease-of-use": "target:usability",
    "target:performance": "target:performance",
    "target:flexibility": "target:usability",
    "target:activity": "source:deprecation",
    "target:size": "target:size",
    "target:stability": "target:stability",
    "target:popularity": "target:popularity",
    "target:other": "other",
    "project:compatibility:license": "project:license",
    "project:compatibility:other-library": "project:integration",
    "project:compatibility:environment": "project:integration",
    "project:consistency:upstream": "project:integration",
    "project:consistency:downstream": "project:integration",
    "project:consistency:within-project": "project:simplification",
    "project:organizational": "project:organizational",
    "project:other": "other",
    "_": "other"
}

coding = pd.read_excel("data/coding.xlsx").fillna("")

# Check for trivial labeling errors
for idx, row in coding[coding.reasonText != ""].iterrows():
    for code in row.reasonCode1.split(","):
        if code not in code2theme:
            print(idx, row.reasonCode1)
    for code in row.reasonCode2.split(","):
        if code not in code2theme:
            print(idx, row.reasonCode2)
            
print(Counter(coding.type))
print(Counter(coding[coding.reasonText != ""].type))

Counter({'commit': 3340, 'pull request': 2775, 'issue': 385})
Counter({'commit': 351, 'pull request': 233, 'issue': 112})


In [3]:
def get_agreement(data1, data2):
    data = coding[coding.reasonText != ""]
    agreed_items, total_items = 0, 0
    rater1_count, rater2_count = Counter(), Counter()
    for item1, item2 in zip(data1, data2):
        item1, item2 = set(item1.split(",")), set(item2.split(","))
        agreed_items += len(item1 & item2)
        total_items += (len(item1) + len(item2)) / 2
        rater1_count.update(item1)
        rater2_count.update(item2)
    agreement = agreed_items / total_items
    acc_agreement = sum(rater1_count[x] * rater2_count[x] for x in rater1_count) / total_items**2
    cohen_kappa = 1 - (1 - agreement) / (1 - acc_agreement)
    return agreement, cohen_kappa
data = coding[coding.reasonText != ""]
agreement, cohen_kappa = get_agreement(data.reasonCode1, data.reasonCode2)
print(f"Observed Agreement: {agreement:.3f}")
print(f"Cohen's Kappa: {cohen_kappa:.3f}")

Observed Agreement: 0.761
Cohen's Kappa: 0.744


In [4]:
theme1, theme2 = [], []
for idx, row in coding.iterrows():
    if row.reasonText != "":
        theme1.append(",".join(set(map(lambda x: code2theme[x], row.reasonCode1.split(",")))))
        theme2.append(",".join(set(map(lambda x: code2theme[x], row.reasonCode2.split(",")))))
    else:
        theme1.append("")
        theme2.append("")
coding["theme1"] = theme1
coding["theme2"] = theme2

final_themes = []
for idx, row in coding.iterrows():
    if row.reasonText != "":
        if set(row.theme1.split(",")) == set(row.theme2.split(",")):
            final_themes.append(row.theme1)
        else:
            final_themes.append("undecided")
    else:
        final_themes.append("")
# coding["finalTheme"] = final_themes
data = coding[coding.reasonText != ""]
agreement, cohen_kappa = get_agreement(data.theme1, data.theme2)
print(f"Observed Agreement: {agreement:.3f}")
print(f"Cohen's Kappa: {cohen_kappa:.3f}")

Observed Agreement: 0.839
Cohen's Kappa: 0.818


References
1. Passonneau, Rebecca. "Measuring agreement on set-valued items (MASI) for semantic and pragmatic annotation." (2006).
2. Krippendorff, Klaus. "Computing Krippendorff's alpha-reliability." (2011).
3. Krippendorff, Klaus. Content analysis: An introduction to its methodology. Sage publications, 2018.

In [12]:
krippendorff_alpha((data.reasonCode1, data.reasonCode2), metric=masi_distance, convert_items=lambda x: set(x.split(",")))

0.7312805955919376

In [6]:
krippendorff_alpha((data.theme1, data.theme2), metric=masi_distance, convert_items=lambda x: set(x.split(",")))

0.8022281178355536

In [7]:
# coding.to_excel("tmp.xlsx", index=False)

In [29]:
rule2type = pd.read_csv("data/rules_typed.csv")
rule2type = {(row.fromLib, row.toLib): row.type for idx, row in rule2type.iterrows()}
cls2 = pd.read_excel("data/cls2.xlsx")
lib2category = dict(zip(cls2.package, cls2.adjusted_category))
theme2repos = defaultdict(set)
theme2rules = defaultdict(Counter)
theme2cats = defaultdict(lambda: defaultdict(set))
for link, from_lib, to_lib, themes in zip(coding.link, coding.fromLib, coding.toLib, coding.finalTheme):
    if themes == "":
        continue
    repo = link.split("/")[3] + "/" + link.split("/")[4]
    for theme in themes.split(","):
        theme2repos[theme].add(repo)
    for i, l1 in enumerate(from_lib.split("\n")):
        theme2rules[theme][(l1, to_lib.split("\n")[i])] += 1
        theme2cats[theme][lib2category[l1]].add(repo)
        if ":" in theme:
            theme2cats[theme.split(":")[0]][lib2category[l1]].add(repo)
    for theme in themes.split(","):
        theme2repos[theme.split(":")[0]].add(repo)
    for i, l1 in enumerate(from_lib.split("\n")):
        theme2rules[theme.split(":")[0]][(l1, to_lib.split("\n")[i])] += 1
    for l2 in to_lib.split("\n"):
        theme2cats[theme][lib2category[l2]].add(repo)
        if ":" in theme:
            theme2cats[theme.split(":")[0]][lib2category[l2]].add(repo)
all_repos = set.union(*theme2repos.values())
for theme in sorted(theme2repos.keys()):
    rename_rules = [x for x in theme2rules[theme] if x in rule2type and "rename" in rule2type[x]]
    # print(rename_rules)
    print(f"{theme:25} {len(theme2repos[theme]):3}/{len(all_repos):3} ({len(theme2repos[theme]) / len(all_repos) * 100:.2f}%)" +
         f"   {len(rename_rules)}/{len(theme2rules[theme])} ({len(rename_rules)/len(theme2rules[theme]) * 100:.2f}%)")
    print(sorted([(x, len(y)) for x, y in theme2cats[theme].items()], key=lambda x: -x[1])[0:5])

other                      21/420 (5.00%)   24/42 (57.14%)
[('JSON', 4), ('Logging', 4), ('Webservice', 4), ('I/O', 1), ('Database', 1)]
project                   188/420 (44.76%)   102/277 (36.82%)
[('Logging', 42), ('JSON', 24), ('Testing', 24), ('Webservice', 22), ('XML', 11)]
project:integration       125/420 (29.76%)   90/207 (43.48%)
[('Logging', 24), ('Webservice', 17), ('Testing', 16), ('JSON', 12), ('Persistence', 10)]
project:license            22/420 (5.24%)   4/20 (20.00%)
[('Code Analyzers', 6), ('JSON', 6), ('PDF', 3), ('Math', 2), ('Concurrency', 2)]
project:organizational      5/420 (1.19%)   9/12 (75.00%)
[('Command Line', 2), ('JSON', 1), ('Build', 1), ('XML', 1)]
project:simplification     53/420 (12.62%)   21/81 (25.93%)
[('Logging', 21), ('Testing', 9), ('JSON', 5), ('Webservice', 5), ('Core Utilities', 3)]
source                    135/420 (32.14%)   68/135 (50.37%)
[('Logging', 23), ('HTTP Clients', 19), ('Testing', 16), ('JSON', 15), ('XML', 11)]
source:deprecat