In [1]:
import pandas as pd
import numpy as np

In [2]:
rules = pd.read_csv("test-recommend-output-wocDepSeq-all.csv")
ground_truth = pd.read_csv("possible-ground-truth-2014.csv")
confirmed_rule_set = set(zip(
    ground_truth[ground_truth["dataConfirmed"]]["fromGroupArtifact"], 
    ground_truth[ground_truth["dataConfirmed"]]["toGroupArtifact"]
))
possible_rule_set = set(zip(
    ground_truth[ground_truth["inDepSeq"] > 0]["fromGroupArtifact"], 
    ground_truth[ground_truth["inDepSeq"] > 0]["toGroupArtifact"]
))
rules["isPossible"] = [x in possible_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
rules["isConfirmed"] = [x in confirmed_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
possible_rules = rules[rules["isPossible"]]
confirmed_rules = rules[rules["isConfirmed"]]
other_rules = rules[~rules["isConfirmed"] & ~rules["isPossible"]]
print("# rules = {}, # confirmed = {}, # possible = {}".format(
    len(rules), len(confirmed_rule_set), len(possible_rule_set)))

# rules = 316328, # confirmed = 289, # possible = 1588


In [5]:
def evaluate(method, rules, possible_rules, confirmed_rules):
    top_k = 100
    top_rules = [list() for x in range(0, top_k)]
    ndcg_possible_at_k = [list() for x in range(0, top_k)]
    ndcg_confirmed_at_k = [list() for x in range(0, top_k)]
    from_lib_set = set(x for x, y in confirmed_rules)
    reciprocal_ranks_confirmed = {}
    reciprocal_ranks_possible = {}
    for from_lib, candidates in rules.groupby(by="fromGroupArtifact"):
        if from_lib not in from_lib_set:
            continue
        #if all(isConfirmed is False or ruleFreq < 25 for isConfirmed, ruleFreq in zip(candidates["isConfirmed"], candidates["ruleFreq"])):
            #continue
        this_rules = [(from_lib, to_lib) for to_lib in candidates["toGroupArtifact"]]
        this_possible_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in possible_rules]
        this_confirmed_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in confirmed_rules]
        for k, (from_lib, to_lib) in enumerate(this_rules):
            if k >= top_k:
                continue
            top_rules[k].append((from_lib, to_lib))
            if (from_lib, to_lib) in possible_rules and from_lib not in reciprocal_ranks_possible:
                reciprocal_ranks_possible[from_lib] = 1 / (k + 1)
            if (from_lib, to_lib) in confirmed_rules and from_lib not in reciprocal_ranks_confirmed:
                reciprocal_ranks_confirmed[from_lib] = 1 / (k + 1)
            dcg_p = sum(int((from_lib, to_lib) in possible_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_p = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_possible_rules))))
            ndcg_possible_at_k[k].append(dcg_p / idcg_p)
            dcg_c = sum(int((from_lib, to_lib) in confirmed_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_c = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_confirmed_rules))))
            ndcg_confirmed_at_k[k].append(dcg_c / idcg_c)
        if from_lib not in reciprocal_ranks_possible:
            reciprocal_ranks_possible[from_lib] = 0
        if from_lib not in reciprocal_ranks_confirmed:
            reciprocal_ranks_confirmed[from_lib] = 0
    for k in range(1, top_k):
        top_rules[k] += top_rules[k - 1]
    print("Result of {}:".format(method))
    print("MRR-C/P = {}/{}".format(
        np.mean(list(reciprocal_ranks_confirmed.values())), 
        np.mean(list(reciprocal_ranks_possible.values()))
    ))
    for k in range(0, top_k):
        if k + 1 > 10 and (k + 1) % 10 != 0:
            continue
        precision = len([x for x in top_rules[k] if x in confirmed_rules]) / len(top_rules[k])
        recall = len([x for x in top_rules[k] if x in confirmed_rules]) / len(confirmed_rules)
        # f_measure = 2 * precision * recall / (precision + recall)
        precision_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(top_rules[k])
        recall_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(possible_rules)
        # f_measure_possible = 2 * precision_possible * recall_possible / (precision_possible + recall_possible)
        print("Top {:3}: Precision-C/P = {:0.3f}/{:0.3f}, Recall-C/P = {:0.3f}/{:0.3f}, NDCG-C/P = {:0.3f}/{:0.3f}"
              .format(k + 1, precision, precision_possible, recall, recall_possible, np.mean(ndcg_confirmed_at_k[k]), np.mean(ndcg_possible_at_k[k])))
def filter_and_sort1(rules):
    return rules[rules["ruleFreq"] > 25].sort_values(by=["fromLib", "ruleFreq"], ascending=[True, False])
def teyton2(rules):
    return rules[rules["relativeRuleFrequency"] > 0.07].sort_values(by=["fromLib", "ruleFreq"], ascending=[True, False])
def teyton(rules):
    return rules.sort_values(by=["fromGroupArtifact", "relativeRuleFreq"], ascending=[True, False])
def our_method(rules):
    rules["confidence"] = (rules["relativeRuleFreq"] * (rules["concurrenceAdjustment"] ** 0.25)
                           * rules["apiSupport"].apply(lambda x: max(0.1, x)) ** 0.5
                           * rules["commitDistance"] ** 2)
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
evaluate("Our Method", our_method(rules), possible_rule_set, confirmed_rule_set)

Result of Our Method:
MRR-C/P = 0.5412051372771891/0.5770438120889152
Top   1: Precision-C/P = 0.447/0.468, Recall-C/P = 0.145/0.028, NDCG-C/P = 0.447/0.468
Top   2: Precision-C/P = 0.353/0.396, Recall-C/P = 0.228/0.047, NDCG-C/P = 0.415/0.419
Top   3: Precision-C/P = 0.286/0.321, Recall-C/P = 0.277/0.057, NDCG-C/P = 0.402/0.375
Top   4: Precision-C/P = 0.244/0.282, Recall-C/P = 0.315/0.066, NDCG-C/P = 0.396/0.350
Top   5: Precision-C/P = 0.232/0.275, Recall-C/P = 0.374/0.081, NDCG-C/P = 0.403/0.347
Top   6: Precision-C/P = 0.211/0.256, Recall-C/P = 0.408/0.090, NDCG-C/P = 0.402/0.336
Top   7: Precision-C/P = 0.198/0.241, Recall-C/P = 0.446/0.099, NDCG-C/P = 0.411/0.329
Top   8: Precision-C/P = 0.196/0.242, Recall-C/P = 0.505/0.113, NDCG-C/P = 0.429/0.332
Top   9: Precision-C/P = 0.180/0.222, Recall-C/P = 0.522/0.117, NDCG-C/P = 0.431/0.325
Top  10: Precision-C/P = 0.168/0.206, Recall-C/P = 0.540/0.121, NDCG-C/P = 0.435/0.320
Top  20: Precision-C/P = 0.106/0.139, Recall-C/P = 0.682/0.1

In [None]:
our_method(rules).to_csv("test.csv", index=False)

In [6]:
evaluate("Teyton et al 2014.", teyton(rules), possible_rule_set, confirmed_rule_set)

Result of Teyton et al 2014.:
MRR-C/P = 0.4299921017245659/0.4587639961038828
Top   1: Precision-C/P = 0.319/0.330, Recall-C/P = 0.104/0.020, NDCG-C/P = 0.319/0.330
Top   2: Precision-C/P = 0.230/0.246, Recall-C/P = 0.149/0.029, NDCG-C/P = 0.275/0.264
Top   3: Precision-C/P = 0.214/0.246, Recall-C/P = 0.208/0.043, NDCG-C/P = 0.295/0.269
Top   4: Precision-C/P = 0.196/0.231, Recall-C/P = 0.253/0.054, NDCG-C/P = 0.304/0.265
Top   5: Precision-C/P = 0.178/0.217, Recall-C/P = 0.287/0.064, NDCG-C/P = 0.305/0.258
Top   6: Precision-C/P = 0.161/0.193, Recall-C/P = 0.311/0.068, NDCG-C/P = 0.306/0.244
Top   7: Precision-C/P = 0.149/0.178, Recall-C/P = 0.336/0.073, NDCG-C/P = 0.310/0.235
Top   8: Precision-C/P = 0.137/0.166, Recall-C/P = 0.353/0.078, NDCG-C/P = 0.314/0.230
Top   9: Precision-C/P = 0.123/0.150, Recall-C/P = 0.356/0.079, NDCG-C/P = 0.313/0.224
Top  10: Precision-C/P = 0.119/0.145, Recall-C/P = 0.384/0.085, NDCG-C/P = 0.323/0.224
Top  20: Precision-C/P = 0.085/0.117, Recall-C/P = 0