In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import multiprocessing

In [2]:
rules = pd.read_csv("test-recommend-output-wocDepSeq3-all.csv")
to_lib2max_freq = {to_lib: max(row["ruleFreqSameCommit"]) for to_lib, row in rules.groupby(by="toGroupArtifact")}
rules["ruleFreqToLibSameCommit"] = rules["ruleFreqSameCommit"] / rules["toGroupArtifact"].apply(lambda x: to_lib2max_freq[x])
rules["confTeyton"] = np.minimum(rules["ruleFreqSameCommit"], rules["ruleFreqToLibSameCommit"]).fillna(0)
to_lib2max_freq = {to_lib: max(row["ruleFreq"]) for to_lib, row in rules.groupby(by="toGroupArtifact")}
rules["ruleFreqToLib"] = rules["ruleFreq"] / rules["toGroupArtifact"].apply(lambda x: to_lib2max_freq[x])
rules["popularityRegularization"] = rules["ruleFreq"] * rules["concurrenceAdjustment"]

In [3]:
from_libs = set()
with open("possible-from-lib-2014.txt", "r") as f:
    from_libs = set(x.strip() for x in f)
len(from_libs), len(from_libs - set(rules["fromGroupArtifact"]))

(304, 40)

In [4]:
import2vecs = dict()
with open("../embedding/java_w2v_dim100.txt", "r") as f:
    num_lib, num_dim = tuple(f.readline().split(" "))
    num_lib, num_dim = int(num_lib), int(num_dim)
    for line in f:
        vals = line.split(" ")
        import2vecs[vals[0]] = [float(x) for x in vals[1:]]
similarities = []
for f, t in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"]):
    if f in import2vecs and t in import2vecs:
        similarities.append(
            np.dot(import2vecs[f], import2vecs[t]) 
            / (np.linalg.norm(import2vecs[f]) * np.linalg.norm(import2vecs[t])))
    else:
        similarities.append(0)
rules["similarity"] = similarities
print(len(rules[rules["similarity"] != 0]))

119154


In [5]:
ground_truth = pd.read_csv("ground-truth.csv")
rules_existed = set(zip(rules["fromGroupArtifact"], rules["toGroupArtifact"]))
confirmed_rule_set = set(zip(
    ground_truth[ground_truth["isConfirmed"]]["fromLib"], 
    ground_truth[ground_truth["isConfirmed"]]["toLib"]
)) & rules_existed
possible_rule_set = set(zip(
    ground_truth[ground_truth["ruleCount"] > 0]["fromLib"], 
    ground_truth[ground_truth["ruleCount"] > 0]["toLib"]
)) & rules_existed

rules["isPossible"] = [x in possible_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
rules["isConfirmed"] = [x in confirmed_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
possible_rules = rules[rules["isPossible"]]
confirmed_rules = rules[rules["isConfirmed"]]
other_rules = rules[~rules["isConfirmed"] & ~rules["isPossible"]]
print("# rules = {}, # confirmed = {}, # possible = {}".format(
    len(rules), len(confirmed_rule_set), len(possible_rule_set)))

# rules = 243581, # confirmed = 763, # possible = 1530


In [6]:
def evaluate(method, rules, possible_rules, confirmed_rules):
    top_k = 100
    top_rules = [list() for x in range(0, top_k)]
    ndcg_possible_at_k = [list() for x in range(0, top_k)]
    ndcg_confirmed_at_k = [list() for x in range(0, top_k)]
    from_lib_set = set(x for x, y in confirmed_rules)
    reciprocal_ranks_confirmed = {}
    reciprocal_ranks_possible = {}
    for from_lib, candidates in rules.groupby(by="fromGroupArtifact"):
        if from_lib not in from_lib_set:
            continue
        this_rules = [(from_lib, to_lib) for to_lib in candidates["toGroupArtifact"]]
        this_possible_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in possible_rules]
        this_confirmed_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in confirmed_rules]
        for k, (from_lib, to_lib) in enumerate(this_rules):
            if k >= top_k:
                continue
            top_rules[k].append((from_lib, to_lib))
            if (from_lib, to_lib) in possible_rules and from_lib not in reciprocal_ranks_possible:
                reciprocal_ranks_possible[from_lib] = 1 / (k + 1)
            if (from_lib, to_lib) in confirmed_rules and from_lib not in reciprocal_ranks_confirmed:
                reciprocal_ranks_confirmed[from_lib] = 1 / (k + 1)
            dcg_p = sum(int((from_lib, to_lib) in possible_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_p = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_possible_rules))))
            ndcg_possible_at_k[k].append(dcg_p / idcg_p)
            dcg_c = sum(int((from_lib, to_lib) in confirmed_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_c = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_confirmed_rules))))
            ndcg_confirmed_at_k[k].append(dcg_c / idcg_c)
        if from_lib not in reciprocal_ranks_possible:
            reciprocal_ranks_possible[from_lib] = 0
        if from_lib not in reciprocal_ranks_confirmed:
            reciprocal_ranks_confirmed[from_lib] = 0
    for k in range(1, top_k):
        top_rules[k] += top_rules[k - 1] 
    result = {
        "Name": method,
        "FromLibCount": len(from_lib_set & set(rules["fromGroupArtifact"])),
        "MRR-C": np.mean(list(reciprocal_ranks_confirmed.values())),
        "MRR-P": np.mean(list(reciprocal_ranks_possible.values())),
        "Precision-C@k": [],
        "Precision-P@k": [],
        "Recall-C@k": [],
        "Recall-P@k": [],
        "NDCG-C@k": [],
        "NDCG-P@k": [],
    }
    for k in range(0, top_k):
        precision = len([x for x in top_rules[k] if x in confirmed_rules]) / len(top_rules[k])
        recall = len([x for x in top_rules[k] if x in confirmed_rules]) / len(confirmed_rules)
        precision_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(top_rules[k])
        recall_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(possible_rules)
        result["Precision-C@k"].append(precision)
        result["Precision-P@k"].append(precision_possible)
        result["Recall-C@k"].append(recall)
        result["Recall-P@k"].append(recall_possible)
        result["NDCG-C@k"].append(np.mean(ndcg_confirmed_at_k[k]))
        result["NDCG-P@k"].append(np.mean(ndcg_possible_at_k[k]))
    return result
def print_evaluation_result(result):
    print("Result of {} on {} Library Queries:".format(result["Name"], result["FromLibCount"]))
    print("MRR-C/P = {}/{}".format(result["MRR-C"], result["MRR-P"]))
    for k in range(0, len(result["Precision-C@k"])):
        if k + 1 > 10 and (k + 1) % 10 != 0:
            continue
        print("Top {:3}: Precision-C/P = {:0.3f}/{:0.3f}, Recall-C/P = {:0.3f}/{:0.3f}, NDCG-C/P = {:0.3f}/{:0.3f}"
              .format(k + 1, 
                      result["Precision-C@k"][k],
                      result["Precision-P@k"][k],
                      result["Recall-C@k"][k],
                      result["Recall-P@k"][k],
                      result["NDCG-C@k"][k], 
                      result["NDCG-P@k"][k]))
def print_one_line_evaluation_result(result):
    print("{:25}: Precision@1 = {:0.3f}, MRR = {:0.3f}, Recall@5 = {:0.3f}, Recall@10 = {:0.3f}, "
         "Recall@20 = {:0.3f}, NDCG@10 = {:0.3f}".format(
             result["Name"],
             result["Precision-C@k"][0],
             result["MRR-C"],
             result["Recall-C@k"][4],
             result["Recall-C@k"][9],
             result["Recall-C@k"][19],
             result["NDCG-C@k"][9]
         ))

In [7]:
def our_method(rules):
    rules["confidence"] = (
        # np.minimum(0.15, rules["confTeyton"])
        # np.log2(rules["confTeyton"] + 1)
        rules["confTeyton"]
        * rules["ruleFreq"]
        # * rules["ruleCount"] / rules["concurrence"]
        # * rules["concurrenceAdjustment"]
        # * np.minimum(rules["ruleFreq"], rules["ruleFreqToLib"])
        * np.maximum(0.1, rules["apiSupport"])
        # * rules["positionSupport"] 
        * rules["commitDistance"]
        * np.log2(rules["possibleCommitCount"] + 1)
        # * np.minimum(rules["similarity"], 0.1)
    )
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
result = evaluate("Our Method", our_method(rules), possible_rule_set, confirmed_rule_set)
print_evaluation_result(result)
our_method(rules).to_csv("recommend-output.csv", index=False)

Result of Our Method on 180 Library Queries:
MRR-C/P = 0.8552836334358074/0.8552836334358074
Top   1: Precision-C/P = 0.794/0.794, Recall-C/P = 0.187/0.093, NDCG-C/P = 0.794/0.794
Top   2: Precision-C/P = 0.630/0.638, Recall-C/P = 0.296/0.150, NDCG-C/P = 0.748/0.710
Top   3: Precision-C/P = 0.558/0.563, Recall-C/P = 0.393/0.198, NDCG-C/P = 0.748/0.675
Top   4: Precision-C/P = 0.499/0.505, Recall-C/P = 0.469/0.237, NDCG-C/P = 0.749/0.650
Top   5: Precision-C/P = 0.449/0.458, Recall-C/P = 0.527/0.268, NDCG-C/P = 0.746/0.631
Top   6: Precision-C/P = 0.406/0.413, Recall-C/P = 0.571/0.290, NDCG-C/P = 0.747/0.612
Top   7: Precision-C/P = 0.371/0.378, Recall-C/P = 0.609/0.310, NDCG-C/P = 0.747/0.596
Top   8: Precision-C/P = 0.343/0.352, Recall-C/P = 0.644/0.329, NDCG-C/P = 0.749/0.589
Top   9: Precision-C/P = 0.318/0.327, Recall-C/P = 0.671/0.344, NDCG-C/P = 0.751/0.582
Top  10: Precision-C/P = 0.301/0.311, Recall-C/P = 0.705/0.363, NDCG-C/P = 0.760/0.583
Top  20: Precision-C/P = 0.185/0.192,

In [8]:
def teyton_2013(rules):
    return rules.sort_values(by=["fromGroupArtifact", "confTeyton"], ascending=[True, False])
result = evaluate("Teyton et al. 2013", teyton_2013(rules), possible_rule_set, confirmed_rule_set)
print_evaluation_result(result)

Result of Teyton et al. 2013 on 180 Library Queries:
MRR-C/P = 0.7487575586716304/0.7523562658751796
Top   1: Precision-C/P = 0.678/0.683, Recall-C/P = 0.160/0.080, NDCG-C/P = 0.678/0.683
Top   2: Precision-C/P = 0.504/0.524, Recall-C/P = 0.237/0.123, NDCG-C/P = 0.608/0.584
Top   3: Precision-C/P = 0.448/0.468, Recall-C/P = 0.316/0.165, NDCG-C/P = 0.609/0.563
Top   4: Precision-C/P = 0.391/0.410, Recall-C/P = 0.367/0.192, NDCG-C/P = 0.598/0.535
Top   5: Precision-C/P = 0.354/0.374, Recall-C/P = 0.415/0.219, NDCG-C/P = 0.596/0.519
Top   6: Precision-C/P = 0.337/0.357, Recall-C/P = 0.474/0.251, NDCG-C/P = 0.607/0.516
Top   7: Precision-C/P = 0.322/0.340, Recall-C/P = 0.528/0.278, NDCG-C/P = 0.619/0.513
Top   8: Precision-C/P = 0.298/0.317, Recall-C/P = 0.560/0.296, NDCG-C/P = 0.621/0.507
Top   9: Precision-C/P = 0.279/0.298, Recall-C/P = 0.588/0.314, NDCG-C/P = 0.624/0.504
Top  10: Precision-C/P = 0.260/0.279, Recall-C/P = 0.609/0.325, NDCG-C/P = 0.626/0.498
Top  20: Precision-C/P = 0.16

In [9]:
def teyton_2014(rules):
    return rules.sort_values(by=["fromGroupArtifact", "ruleFreq"], ascending=[True, False])
result = evaluate("Teyton et al. 2014", teyton_2014(rules), possible_rule_set, confirmed_rule_set)
print_evaluation_result(result)

Result of Teyton et al. 2014 on 180 Library Queries:
MRR-C/P = 0.543908456784227/0.5658621795696003
Top   1: Precision-C/P = 0.450/0.467, Recall-C/P = 0.106/0.055, NDCG-C/P = 0.450/0.467
Top   2: Precision-C/P = 0.331/0.348, Recall-C/P = 0.156/0.082, NDCG-C/P = 0.403/0.396
Top   3: Precision-C/P = 0.270/0.290, Recall-C/P = 0.190/0.102, NDCG-C/P = 0.386/0.367
Top   4: Precision-C/P = 0.222/0.248, Recall-C/P = 0.208/0.116, NDCG-C/P = 0.369/0.344
Top   5: Precision-C/P = 0.200/0.222, Recall-C/P = 0.235/0.130, NDCG-C/P = 0.368/0.332
Top   6: Precision-C/P = 0.177/0.197, Recall-C/P = 0.249/0.139, NDCG-C/P = 0.364/0.319
Top   7: Precision-C/P = 0.159/0.179, Recall-C/P = 0.261/0.146, NDCG-C/P = 0.360/0.306
Top   8: Precision-C/P = 0.150/0.170, Recall-C/P = 0.280/0.159, NDCG-C/P = 0.363/0.305
Top   9: Precision-C/P = 0.145/0.165, Recall-C/P = 0.305/0.173, NDCG-C/P = 0.369/0.307
Top  10: Precision-C/P = 0.137/0.158, Recall-C/P = 0.321/0.185, NDCG-C/P = 0.373/0.307
Top  20: Precision-C/P = 0.094

In [10]:
def regression(rules):
    feat_names = ["confTeyton", "apiSupportMin", "ruleFreq", "commitDistance", "possibleCommitCount"]
    rules["apiSupportMin"] = rules["apiSupport"].apply(lambda x: max(0.1, x))
    feat_series = {name: np.log2(rules[name] + 1) for name in feat_names}
    X = []
    for i in range(0, len(rules)):
        X.append([feat_series[name].iloc[i] for name in feat_names])      
    y = list(map(
        lambda x: int(x[0]) + int(x[1]),
        zip(rules["isConfirmed"], rules["isPossible"])
    ))
    sample_weight = list(map(
        lambda x: int(x[0]) * 5 + int(x[1]) * 1 + 0.08,
        zip(rules["isConfirmed"], rules["isPossible"])
    ))
    reg = linear_model.TweedieRegressor(power=1, verbose=1)
    reg.fit(X, y, sample_weight)
    for idx, coef in enumerate(reg.coef_):
        print("Coef {}: {}".format(feat_names[idx], coef))
    rules["confidence"] = reg.predict(X)
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
result = evaluate("Generalized Linear Model", regression(rules), possible_rule_set, confirmed_rule_set)
print_evaluation_result(result)

Coef confTeyton: 0.04420272779975144
Coef apiSupportMin: 0.0174904253542674
Coef ruleFreq: 0.05212389981908839
Coef commitDistance: 0.13866919522612015
Coef possibleCommitCount: 0.2946686913978037
Result of Generalized Linear Model on 180 Library Queries:
MRR-C/P = 0.8247356636448312/0.8248543720968727
Top   1: Precision-C/P = 0.756/0.756, Recall-C/P = 0.178/0.089, NDCG-C/P = 0.756/0.756
Top   2: Precision-C/P = 0.627/0.630, Recall-C/P = 0.295/0.148, NDCG-C/P = 0.731/0.693
Top   3: Precision-C/P = 0.522/0.530, Recall-C/P = 0.368/0.186, NDCG-C/P = 0.705/0.643
Top   4: Precision-C/P = 0.462/0.469, Recall-C/P = 0.434/0.220, NDCG-C/P = 0.701/0.614
Top   5: Precision-C/P = 0.412/0.421, Recall-C/P = 0.484/0.246, NDCG-C/P = 0.696/0.593
Top   6: Precision-C/P = 0.376/0.386, Recall-C/P = 0.529/0.271, NDCG-C/P = 0.698/0.581
Top   7: Precision-C/P = 0.348/0.357, Recall-C/P = 0.571/0.292, NDCG-C/P = 0.702/0.570
Top   8: Precision-C/P = 0.324/0.331, Recall-C/P = 0.607/0.310, NDCG-C/P = 0.705/0.562


In [11]:
def method(rules, a, b, c, d, e):
    rules["confidence"] = (
        rules["confTeyton"] ** a
        * rules["ruleFreq"] ** b
        * np.maximum(0.1, rules["apiSupport"]) ** c
        * rules["commitDistance"] ** d
        * np.log2(rules["possibleCommitCount"] + 1) ** e
    )
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
def parallel_worker(name, func, params):
    return evaluate(name, func(rules, *params), possible_rule_set, confirmed_rule_set)
methods = [
    ("Teyton et al. 2014", teyton_2013, ()),
    ("RuleFreq", method, (0, 1, 0, 0, 0)),
    ("APISupport", method, (0, 0, 1, 0, 0)), 
    ("CommitDistance", method, (0, 0, 0, 1, 0)), 
    ("PossibleCommitCount", method, (0, 0, 0, 0, 1)),
    ("No ConfTeyton", method, (0, 1, 1, 1, 1)),
    ("No RuleFreq", method, (1, 0, 1, 1, 1)),
    ("No APISupport", method, (1, 1, 0, 1, 1)), 
    ("No CommitDistance", method, (1, 1, 1, 0, 1)), 
    ("No PossibleCommitCount", method, (1, 1, 1, 1, 0)),
    # ("Generalized Linear Model", regression, ()),
    ("Our Method", our_method, ())
]
pool = multiprocessing.Pool(12)
results = pool.starmap(parallel_worker, methods)
pool.close()
pool.join()
for result in results:
    print_one_line_evaluation_result(result)
print_one_line_evaluation_result(parallel_worker("Generalized Linear Model", regression, ()))

Teyton et al. 2014       : Precision@1 = 0.678, MRR = 0.749, Recall@5 = 0.415, Recall@10 = 0.609, Recall@20 = 0.747, NDCG@10 = 0.626
RuleFreq                 : Precision@1 = 0.450, MRR = 0.544, Recall@5 = 0.235, Recall@10 = 0.321, Recall@20 = 0.439, NDCG@10 = 0.373
APISupport               : Precision@1 = 0.200, MRR = 0.353, Recall@5 = 0.170, Recall@10 = 0.278, Recall@20 = 0.467, NDCG@10 = 0.287
CommitDistance           : Precision@1 = 0.417, MRR = 0.510, Recall@5 = 0.227, Recall@10 = 0.294, Recall@20 = 0.342, NDCG@10 = 0.388
PossibleCommitCount      : Precision@1 = 0.728, MRR = 0.803, Recall@5 = 0.490, Recall@10 = 0.666, Recall@20 = 0.820, NDCG@10 = 0.706
No Conf Teyton           : Precision@1 = 0.783, MRR = 0.850, Recall@5 = 0.529, Recall@10 = 0.708, Recall@20 = 0.853, NDCG@10 = 0.759
No RuleFreq              : Precision@1 = 0.789, MRR = 0.858, Recall@5 = 0.535, Recall@10 = 0.710, Recall@20 = 0.899, NDCG@10 = 0.767
No APISupport            : Precision@1 = 0.778, MRR = 0.841, Recall@5

In [12]:
vals = [0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
params = []
for loc in range(0, 5):
    for i in vals:
        param = [1, 1, 1, 1, 1]
        param[loc] = i
        params.append((str(param), method, param))
pool = multiprocessing.Pool(12)
results = pool.starmap(parallel_worker, params)
pool.close()
for result in results:
    print_one_line_evaluation_result(result)

[0.2, 1, 1, 1, 1]        : Precision@1 = 0.794, MRR = 0.856, Recall@5 = 0.528, Recall@10 = 0.710, Recall@20 = 0.857, NDCG@10 = 0.762
[0.4, 1, 1, 1, 1]        : Precision@1 = 0.794, MRR = 0.856, Recall@5 = 0.528, Recall@10 = 0.708, Recall@20 = 0.858, NDCG@10 = 0.762
[0.6, 1, 1, 1, 1]        : Precision@1 = 0.800, MRR = 0.859, Recall@5 = 0.528, Recall@10 = 0.706, Recall@20 = 0.861, NDCG@10 = 0.763
[0.8, 1, 1, 1, 1]        : Precision@1 = 0.794, MRR = 0.856, Recall@5 = 0.529, Recall@10 = 0.706, Recall@20 = 0.861, NDCG@10 = 0.761
[1.0, 1, 1, 1, 1]        : Precision@1 = 0.794, MRR = 0.855, Recall@5 = 0.527, Recall@10 = 0.705, Recall@20 = 0.861, NDCG@10 = 0.760
[1.5, 1, 1, 1, 1]        : Precision@1 = 0.789, MRR = 0.852, Recall@5 = 0.526, Recall@10 = 0.704, Recall@20 = 0.861, NDCG@10 = 0.758
[2.0, 1, 1, 1, 1]        : Precision@1 = 0.794, MRR = 0.854, Recall@5 = 0.527, Recall@10 = 0.705, Recall@20 = 0.858, NDCG@10 = 0.760
[2.5, 1, 1, 1, 1]        : Precision@1 = 0.789, MRR = 0.850, Recall@5