In [1]:
import pandas as pd
import numpy as np
import multiprocessing

In [2]:
rec = pd.read_csv("recommend-output-all-repo-top-500.csv")

to_lib2cnt = {to_lib: len(rows) for to_lib, rows in rec.groupby(by="toLib")}
from_lib2cnt = {from_lib: len(rows) for from_lib, rows in rec.groupby(by="fromLib")}
rec["confTeyton"] = (rec["ruleCountSameCommit"] / np.maximum(
    rec["toLib"].apply(lambda x: to_lib2cnt[x]),
    rec["fromLib"].apply(lambda x: from_lib2cnt[x])
)).fillna(0)

migrations = pd.read_excel("manual/extended-migrations-annotated.xlsx")
rules = set(zip(migrations["fromLib"], migrations["toLib"]))
len(rec), len(rules), len(migrations)

(383218, 4415, 12565)

In [3]:
migrations_true = migrations[migrations["isTrue"]]
rules_confirmed = set(zip(migrations_true["fromLib"], migrations_true["toLib"]))
len(migrations_true), len(rules_confirmed)

(4273, 611)

In [4]:
len(set(migrations_true["repoName"])), len(set(migrations["repoName"]))

(785, 1313)

In [5]:
len(set(migrations_true["startCommit"]) | set(migrations_true["endCommit"]))

1233

In [6]:
len(set(migrations_true["fromLib"]))

231

In [7]:
rec = rec[rec["fromLib"].isin(migrations_true["fromLib"])]
# rec = rec[rec["toLib"].apply(lambda x: "infinispan" not in x)]
rec_filtered = rec[rec[["fromLib","toLib"]].apply(lambda x: (x[0], x[1]) in rules, axis=1)].copy()
len(rec_filtered)

2617

In [20]:
def evaluate(method, rules, possible_rules, confirmed_rules):
    top_k = 20
    top_rules = [list() for x in range(0, top_k)]
    ndcg_possible_at_k = [list() for x in range(0, top_k)]
    ndcg_confirmed_at_k = [list() for x in range(0, top_k)]
    from_lib_set = set(x for x, y in confirmed_rules)
    reciprocal_ranks_confirmed = {}
    reciprocal_ranks_possible = {}
    for from_lib, candidates in rules.groupby(by="fromLib"):
        if from_lib not in from_lib_set:
            continue
        this_rules = [(from_lib, to_lib) for to_lib in candidates["toLib"]]
        this_possible_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in possible_rules]
        this_confirmed_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in confirmed_rules]
        last_k, last_ndcg = 0, 0
        for k, (from_lib, to_lib) in enumerate(this_rules):
            if k >= top_k:
                continue
            last_k = k
            top_rules[k].append((from_lib, to_lib))
            if (from_lib, to_lib) in possible_rules and from_lib not in reciprocal_ranks_possible:
                reciprocal_ranks_possible[from_lib] = 1 / (k + 1)
            if (from_lib, to_lib) in confirmed_rules and from_lib not in reciprocal_ranks_confirmed:
                reciprocal_ranks_confirmed[from_lib] = 1 / (k + 1)
            dcg_p = sum(int((from_lib, to_lib) in possible_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_p = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_possible_rules))))
            if idcg_p == 0:
                ndcg_possible_at_k[k].append(0)
            else:
                ndcg_possible_at_k[k].append(dcg_p / idcg_p)
            dcg_c = sum(int((from_lib, to_lib) in confirmed_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_c = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_confirmed_rules))))
            if idcg_c == 0:
                ndcg_confirmed_at_k[k].append(0)
                last_ndcg = 0
            else:
                ndcg_confirmed_at_k[k].append(dcg_c / idcg_c)
                last_ndcg = dcg_c / idcg_c
        for k in range(last_k + 1, top_k):
            ndcg_confirmed_at_k[k].append(last_ndcg)
        if from_lib not in reciprocal_ranks_possible:
            reciprocal_ranks_possible[from_lib] = 0
        if from_lib not in reciprocal_ranks_confirmed:
            reciprocal_ranks_confirmed[from_lib] = 0
            
    for k in range(1, top_k):
        top_rules[k] += top_rules[k - 1] 
    result = {
        "Name": method,
        "FromLibCount": len(from_lib_set & set(rules["fromLib"])),
        "MRR-C": np.mean(list(reciprocal_ranks_confirmed.values())),
        "MRR-P": np.mean(list(reciprocal_ranks_possible.values())),
        "Precision-C@k": [],
        "Precision-P@k": [],
        "Recall-C@k": [],
        "Recall-P@k": [],
        "NDCG-C@k": [],
        "NDCG-P@k": [],
    }
    for k in range(0, top_k):
        precision = len([x for x in top_rules[k] if x in confirmed_rules]) / len(top_rules[k])
        recall = len([x for x in top_rules[k] if x in confirmed_rules]) / len(confirmed_rules)
        precision_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(top_rules[k])
        recall_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(possible_rules)
        result["Precision-C@k"].append(precision)
        result["Precision-P@k"].append(precision_possible)
        result["Recall-C@k"].append(recall)
        result["Recall-P@k"].append(recall_possible)
        if len(ndcg_confirmed_at_k[k]) > 0:
            result["NDCG-C@k"].append(np.mean(ndcg_confirmed_at_k[k]))
        else:
            result["NDCG-C@k"].append(0)
        if len(ndcg_possible_at_k[k]) > 0:
            result["NDCG-P@k"].append(np.mean(ndcg_possible_at_k[k]))
        else:
            result["NDCG-P@k"].append(0)
    return result
def print_evaluation_result(result):
    print("Result of {} on {} Library Queries:".format(result["Name"], result["FromLibCount"]))
    print("MRR-C/P = {}/{}".format(result["MRR-C"], result["MRR-P"]))
    for k in range(0, len(result["Precision-C@k"])):
        if k + 1 > 10 and (k + 1) % 10 != 0:
            continue
        print("Top {:3}: Precision = {:0.4f}, Recall = {:0.4f}, NDCG = {:0.4f}"
              .format(k + 1, result["Precision-C@k"][k], result["Recall-C@k"][k], result["NDCG-C@k"][k]))
def print_one_line_evaluation_result(result):
    print("{:30}: Precision@1 = {:0.4f}, MRR = {:0.4f}, Recall@5 = {:0.4f}, Recall@10 = {:0.4f}, "
         "Recall@20 = {:0.4f}, NDCG@10 = {:0.4f}".format(
             result["Name"],
             result["Precision-C@k"][0],
             result["MRR-C"],
             result["Recall-C@k"][4],
             result["Recall-C@k"][9],
             result["Recall-C@k"][19],
             result["NDCG-C@k"][9]
         ))

In [21]:
result = evaluate("Our Method", rec, rules_confirmed, rules_confirmed)
print_evaluation_result(result)

Result of Our Method on 230 Library Queries:
MRR-C/P = 0.7880833013249893/0.7880833013249893
Top   1: Precision-C/P = 0.6783/0.6783, Recall-C/P = 0.2553/0.2553, NDCG-C/P = 0.6783/0.6783
Top   2: Precision-C/P = 0.5413/0.5413, Recall-C/P = 0.4075/0.4075, NDCG-C/P = 0.6856/0.6856
Top   3: Precision-C/P = 0.4507/0.4507, Recall-C/P = 0.5090/0.5090, NDCG-C/P = 0.6962/0.6962
Top   4: Precision-C/P = 0.3891/0.3891, Recall-C/P = 0.5859/0.5859, NDCG-C/P = 0.7133/0.7133
Top   5: Precision-C/P = 0.3391/0.3391, Recall-C/P = 0.6383/0.6383, NDCG-C/P = 0.7235/0.7235
Top   6: Precision-C/P = 0.3080/0.3080, Recall-C/P = 0.6956/0.6956, NDCG-C/P = 0.7372/0.7372
Top   7: Precision-C/P = 0.2789/0.2789, Recall-C/P = 0.7349/0.7349, NDCG-C/P = 0.7467/0.7467
Top   8: Precision-C/P = 0.2565/0.2565, Recall-C/P = 0.7725/0.7725, NDCG-C/P = 0.7583/0.7583
Top   9: Precision-C/P = 0.2382/0.2382, Recall-C/P = 0.8069/0.8069, NDCG-C/P = 0.7675/0.7675
Top  10: Precision-C/P = 0.2191/0.2191, Recall-C/P = 0.8249/0.8249, ND

In [22]:
result = evaluate("Our Method", rec_filtered, rules_confirmed, rules_confirmed)
print_evaluation_result(result)

Result of Our Method on 230 Library Queries:
MRR-C/P = 0.8426552795031056/0.8426552795031056
Top   1: Precision-C/P = 0.7565/0.7565, Recall-C/P = 0.2848/0.2848, NDCG-C/P = 0.7565/0.7565
Top   2: Precision-C/P = 0.5996/0.5996, Recall-C/P = 0.4435/0.4435, NDCG-C/P = 0.7516/0.7427
Top   3: Precision-C/P = 0.5052/0.5052, Recall-C/P = 0.5548/0.5548, NDCG-C/P = 0.7628/0.7526
Top   4: Precision-C/P = 0.4381/0.4381, Recall-C/P = 0.6318/0.6318, NDCG-C/P = 0.7748/0.7604
Top   5: Precision-C/P = 0.3942/0.3942, Recall-C/P = 0.6956/0.6956, NDCG-C/P = 0.7875/0.7642
Top   6: Precision-C/P = 0.3610/0.3610, Recall-C/P = 0.7480/0.7480, NDCG-C/P = 0.8000/0.7769
Top   7: Precision-C/P = 0.3338/0.3338, Recall-C/P = 0.7856/0.7856, NDCG-C/P = 0.8085/0.7773
Top   8: Precision-C/P = 0.3132/0.3132, Recall-C/P = 0.8216/0.8216, NDCG-C/P = 0.8170/0.7846
Top   9: Precision-C/P = 0.2978/0.2978, Recall-C/P = 0.8543/0.8543, NDCG-C/P = 0.8251/0.7962
Top  10: Precision-C/P = 0.2831/0.2831, Recall-C/P = 0.8756/0.8756, ND

In [24]:
def teyton_2013(rules, t):
    return rules[rules["confTeyton"]>=t].sample(frac=1).sort_values(by=["fromLib", "confTeyton"], ascending=[True, False])
def method(rules, a, b, c, d):
    rules["confidence"] = (
        rules["ruleFreqSameCommit"] ** a
        * np.maximum(0.1, rules["apiSupport"]) ** b
        * rules["commitDistance"] ** c
        * np.log2(rules["possibleCommitCount"] + 1) ** d
    )
    return rules.sample(frac=1).sort_values(by=["fromLib", "confidence"], ascending=[True, False])
def our_method(rules):
    rules["confidence"] = (
        rules["ruleFreqSameCommit"]
        * np.maximum(0.1, rules["apiSupport"])
        * rules["commitDistance"]
        * np.log2(rules["possibleCommitCount"] + 1)
    )
    return rules.sample(frac=1).sort_values(by=["fromLib", "confidence"], ascending=[True, False])
def parallel_worker(name, func, params):
    return evaluate(name, func(rec, *params), rules_confirmed, rules_confirmed)
methods = [
    ("Teyton et al. 2013", teyton_2013, (0,)),
    ("Teyton et al. 2013'", teyton_2013, (0.002,)),
    ("Teyton et al. 2013''", teyton_2013, (0.015,)),
    ("Alrubaye et al. 2019", alrubaye_2019, ()),
    ("Our Approach", our_method, ())
]
pool = multiprocessing.Pool(12)
results = pool.starmap(parallel_worker, methods)
pool.close()
pool.join()
for result in results:
    print_one_line_evaluation_result(result)

Teyton et al. 2013            : Precision@1 = 0.6000, MRR = 0.6997, Recall@5 = 0.5254, Recall@10 = 0.6759, Recall@20 = 0.8527, NDCG@10 = 0.6446
Teyton et al. 2013'           : Precision@1 = 0.6035, MRR = 0.7008, Recall@5 = 0.5188, Recall@10 = 0.6661, Recall@20 = 0.8020, NDCG@10 = 0.6693
Teyton et al. 2013''          : Precision@1 = 0.8148, MRR = 0.8410, Recall@5 = 0.2209, Recall@10 = 0.2226, Recall@20 = 0.2226, NDCG@10 = 0.8482
Alrubaye et al. 2019          : Precision@1 = 0.9143, MRR = 0.9143, Recall@5 = 0.0540, Recall@10 = 0.0540, Recall@20 = 0.0540, NDCG@10 = 0.9143
Our Approach                  : Precision@1 = 0.6826, MRR = 0.7899, Recall@5 = 0.6514, Recall@10 = 0.8314, Recall@20 = 0.9902, NDCG@10 = 0.7760


In [23]:
def alrubaye_2019(rules):
    return rules[
        (rules["methodChangeCount"] > 0) & (rules["ruleFreqSameCommit"] >= 0.6)
    ].sample(frac=1).sort_values(by=["fromLib", "ruleFreqSameCommit"], ascending=[True, False])
result = evaluate("Alrubaye et al. 2019", alrubaye_2019(rec), rules_confirmed, rules_confirmed)
print_evaluation_result(result)

Result of Alrubaye et al. 2019 on 35 Library Queries:
MRR-C/P = 0.9142857142857143/0.9142857142857143
Top   1: Precision-C/P = 0.9143/0.9143, Recall-C/P = 0.0524/0.0524, NDCG-C/P = 0.9143/0.9143
Top   2: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.5000
Top   3: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   4: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   5: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   6: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   7: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   8: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top   9: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0.0540, NDCG-C/P = 0.9143/0.0000
Top  10: Precision-C/P = 0.8919/0.8919, Recall-C/P = 0.0540/0