In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [10]:
rules = pd.read_csv("test-recommend-output-wocDepSeq-all.csv")
to_lib2max_freq = {to_lib: max(row["ruleFreqSameCommit"]) for to_lib, row in rules.groupby(by="toGroupArtifact")}
rules["ruleFreqToLibSameCommit"] = rules["ruleFreqSameCommit"] / rules["toGroupArtifact"].apply(lambda x: to_lib2max_freq[x])
rules["confTeyton"] = np.minimum(rules["ruleFreqSameCommit"], rules["ruleFreqToLibSameCommit"]).fillna(0)
to_lib2max_freq = {to_lib: max(row["ruleFreq"]) for to_lib, row in rules.groupby(by="toGroupArtifact")}
rules["ruleFreqToLib"] = rules["ruleFreq"] / rules["toGroupArtifact"].apply(lambda x: to_lib2max_freq[x])
    
ground_truth = pd.read_csv("possible-ground-truth-2014.csv")
confirmed_rule_set = set(zip(
    ground_truth[ground_truth["dataConfirmed"]]["fromGroupArtifact"], 
    ground_truth[ground_truth["dataConfirmed"]]["toGroupArtifact"]
))
possible_rule_set = set(zip(
    ground_truth[ground_truth["inDepSeq"] > 0]["fromGroupArtifact"], 
    ground_truth[ground_truth["inDepSeq"] > 0]["toGroupArtifact"]
))

rules["isPossible"] = [x in possible_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
rules["isConfirmed"] = [x in confirmed_rule_set for x in zip(rules["fromGroupArtifact"], rules["toGroupArtifact"])]
possible_rules = rules[rules["isPossible"]]
confirmed_rules = rules[rules["isConfirmed"]]
other_rules = rules[~rules["isConfirmed"] & ~rules["isPossible"]]
print("# rules = {}, # confirmed = {}, # possible = {}".format(
    len(rules), len(confirmed_rule_set), len(possible_rule_set)))

# rules = 475022, # confirmed = 289, # possible = 1588


In [3]:
def evaluate(method, rules, possible_rules, confirmed_rules):
    top_k = 100
    top_rules = [list() for x in range(0, top_k)]
    ndcg_possible_at_k = [list() for x in range(0, top_k)]
    ndcg_confirmed_at_k = [list() for x in range(0, top_k)]
    from_lib_set = set(x for x, y in confirmed_rules)
    reciprocal_ranks_confirmed = {}
    reciprocal_ranks_possible = {}
    for from_lib, candidates in rules.groupby(by="fromGroupArtifact"):
        if from_lib not in from_lib_set:
            continue
        #if all(isConfirmed is False or ruleFreq < 25 for isConfirmed, ruleFreq in zip(candidates["isConfirmed"], candidates["ruleFreq"])):
            #continue
        this_rules = [(from_lib, to_lib) for to_lib in candidates["toGroupArtifact"]]
        this_possible_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in possible_rules]
        this_confirmed_rules = [(from_lib, to_lib) for from_lib, to_lib in this_rules if (from_lib, to_lib) in confirmed_rules]
        for k, (from_lib, to_lib) in enumerate(this_rules):
            if k >= top_k:
                continue
            top_rules[k].append((from_lib, to_lib))
            if (from_lib, to_lib) in possible_rules and from_lib not in reciprocal_ranks_possible:
                reciprocal_ranks_possible[from_lib] = 1 / (k + 1)
            if (from_lib, to_lib) in confirmed_rules and from_lib not in reciprocal_ranks_confirmed:
                reciprocal_ranks_confirmed[from_lib] = 1 / (k + 1)
            dcg_p = sum(int((from_lib, to_lib) in possible_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_p = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_possible_rules))))
            ndcg_possible_at_k[k].append(dcg_p / idcg_p)
            dcg_c = sum(int((from_lib, to_lib) in confirmed_rules) / np.log2(i+2) for i, (from_lib, to_lib) in enumerate(this_rules[0:k+1]))
            idcg_c = sum(1 / np.log2(i+2) for i in range(0, min(k + 1, len(this_confirmed_rules))))
            ndcg_confirmed_at_k[k].append(dcg_c / idcg_c)
        if from_lib not in reciprocal_ranks_possible:
            reciprocal_ranks_possible[from_lib] = 0
        if from_lib not in reciprocal_ranks_confirmed:
            reciprocal_ranks_confirmed[from_lib] = 0
    for k in range(1, top_k):
        top_rules[k] += top_rules[k - 1]
    print("Result of {}:".format(method))
    print("MRR-C/P = {}/{}".format(
        np.mean(list(reciprocal_ranks_confirmed.values())), 
        np.mean(list(reciprocal_ranks_possible.values()))
    ))
    for k in range(0, top_k):
        if k + 1 > 10 and (k + 1) % 10 != 0:
            continue
        precision = len([x for x in top_rules[k] if x in confirmed_rules]) / len(top_rules[k])
        recall = len([x for x in top_rules[k] if x in confirmed_rules]) / len(confirmed_rules)
        # f_measure = 2 * precision * recall / (precision + recall)
        precision_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(top_rules[k])
        recall_possible = len([x for x in top_rules[k] if x in possible_rules]) / len(possible_rules)
        # f_measure_possible = 2 * precision_possible * recall_possible / (precision_possible + recall_possible)
        print("Top {:3}: Precision-C/P = {:0.3f}/{:0.3f}, Recall-C/P = {:0.3f}/{:0.3f}, NDCG-C/P = {:0.3f}/{:0.3f}"
              .format(k + 1, precision, precision_possible, recall, recall_possible, np.mean(ndcg_confirmed_at_k[k]), np.mean(ndcg_possible_at_k[k])))

In [16]:
def our_method(rules):
    rules["confidence"] = (rules["confTeyton"] ** 2
                           #* rules["ruleFreq"] * (rules["concurrenceAdjustment"] ** 0.25)
                           #* np.minimum(rules["ruleFreq"], rules["ruleFreqToLib"])
                           * rules["apiSupport"].apply(lambda x: max(0.1, x)) ** 0.5
                           * rules["commitDistance"])
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
evaluate("Our Method", our_method(rules), possible_rule_set, confirmed_rule_set)

Result of Our Method:
MRR-C/P = 0.5639573285870546/0.6060196293341326
Top   1: Precision-C/P = 0.468/0.500, Recall-C/P = 0.152/0.030, NDCG-C/P = 0.468/0.500
Top   2: Precision-C/P = 0.380/0.422, Recall-C/P = 0.246/0.050, NDCG-C/P = 0.438/0.443
Top   3: Precision-C/P = 0.343/0.382, Recall-C/P = 0.332/0.067, NDCG-C/P = 0.451/0.418
Top   4: Precision-C/P = 0.316/0.357, Recall-C/P = 0.408/0.084, NDCG-C/P = 0.464/0.411
Top   5: Precision-C/P = 0.281/0.337, Recall-C/P = 0.453/0.099, NDCG-C/P = 0.464/0.406
Top   6: Precision-C/P = 0.259/0.309, Recall-C/P = 0.502/0.109, NDCG-C/P = 0.469/0.391
Top   7: Precision-C/P = 0.239/0.287, Recall-C/P = 0.540/0.118, NDCG-C/P = 0.473/0.382
Top   8: Precision-C/P = 0.224/0.267, Recall-C/P = 0.578/0.125, NDCG-C/P = 0.480/0.372
Top   9: Precision-C/P = 0.208/0.251, Recall-C/P = 0.602/0.132, NDCG-C/P = 0.487/0.370
Top  10: Precision-C/P = 0.193/0.236, Recall-C/P = 0.623/0.139, NDCG-C/P = 0.493/0.367
Top  20: Precision-C/P = 0.111/0.140, Recall-C/P = 0.716/0.1

In [5]:
def teyton_2013(rules):
    return rules.sort_values(by=["fromGroupArtifact", "confTeyton"], ascending=[True, False])
evaluate("Teyton et al. 2013", teyton_2013(rules), possible_rule_set, confirmed_rule_set)

Result of Teyton et al. 2013:
MRR-C/P = 0.5342681364642328/0.5749173454498131
Top   1: Precision-C/P = 0.436/0.468, Recall-C/P = 0.142/0.028, NDCG-C/P = 0.436/0.468
Top   2: Precision-C/P = 0.332/0.369, Recall-C/P = 0.215/0.043, NDCG-C/P = 0.395/0.396
Top   3: Precision-C/P = 0.304/0.346, Recall-C/P = 0.294/0.061, NDCG-C/P = 0.412/0.383
Top   4: Precision-C/P = 0.287/0.327, Recall-C/P = 0.370/0.077, NDCG-C/P = 0.425/0.377
Top   5: Precision-C/P = 0.266/0.311, Recall-C/P = 0.429/0.091, NDCG-C/P = 0.435/0.373
Top   6: Precision-C/P = 0.252/0.295, Recall-C/P = 0.488/0.104, NDCG-C/P = 0.449/0.369
Top   7: Precision-C/P = 0.230/0.273, Recall-C/P = 0.519/0.112, NDCG-C/P = 0.455/0.362
Top   8: Precision-C/P = 0.212/0.254, Recall-C/P = 0.547/0.119, NDCG-C/P = 0.459/0.354
Top   9: Precision-C/P = 0.204/0.243, Recall-C/P = 0.592/0.128, NDCG-C/P = 0.469/0.355
Top  10: Precision-C/P = 0.188/0.227, Recall-C/P = 0.606/0.133, NDCG-C/P = 0.471/0.351
Top  20: Precision-C/P = 0.109/0.136, Recall-C/P = 0

In [6]:
def teyton_2014(rules):
    return rules.sort_values(by=["fromGroupArtifact", "ruleFreq"], ascending=[True, False])
evaluate("Teyton et al. 2014", teyton_2014(rules), possible_rule_set, confirmed_rule_set)

Result of Teyton et al. 2014:
MRR-C/P = 0.4299921017245659/0.4587639961038828
Top   1: Precision-C/P = 0.319/0.330, Recall-C/P = 0.104/0.020, NDCG-C/P = 0.319/0.330
Top   2: Precision-C/P = 0.230/0.246, Recall-C/P = 0.149/0.029, NDCG-C/P = 0.275/0.264
Top   3: Precision-C/P = 0.214/0.246, Recall-C/P = 0.208/0.043, NDCG-C/P = 0.295/0.269
Top   4: Precision-C/P = 0.196/0.231, Recall-C/P = 0.253/0.054, NDCG-C/P = 0.304/0.265
Top   5: Precision-C/P = 0.178/0.217, Recall-C/P = 0.287/0.064, NDCG-C/P = 0.305/0.258
Top   6: Precision-C/P = 0.161/0.193, Recall-C/P = 0.311/0.068, NDCG-C/P = 0.306/0.244
Top   7: Precision-C/P = 0.149/0.178, Recall-C/P = 0.336/0.073, NDCG-C/P = 0.310/0.235
Top   8: Precision-C/P = 0.137/0.166, Recall-C/P = 0.353/0.078, NDCG-C/P = 0.314/0.230
Top   9: Precision-C/P = 0.123/0.150, Recall-C/P = 0.356/0.079, NDCG-C/P = 0.313/0.224
Top  10: Precision-C/P = 0.119/0.145, Recall-C/P = 0.384/0.085, NDCG-C/P = 0.323/0.224
Top  20: Precision-C/P = 0.085/0.117, Recall-C/P = 0

In [7]:
def regression(rules):
    feat_names = ["confTeyton", "apiSupportMin", "commitDistance"]
    rules["apiSupportMin"] = rules["apiSupport"].apply(lambda x: max(0.1, x))
    feat_series = {name: np.log2(rules[name] + 1) for name in feat_names}
    X = []
    for i in range(0, len(rules)):
        X.append([feat_series[name].iloc[i] for name in feat_names])      
    y = list(map(
        lambda x: int(x[0]) + int(x[1]),
        zip(rules["isConfirmed"], rules["isPossible"])
    ))
    sample_weight = list(map(
        lambda x: int(x[0]) * 5 + int(x[1]) * 1 + 0.03,
        zip(rules["isConfirmed"], rules["isPossible"])
    ))
    reg = linear_model.TweedieRegressor(power=1, verbose=1)
    reg.fit(X, y, sample_weight)
    for idx, coef in enumerate(reg.coef_):
        print("Coef {}: {}".format(feat_names[idx], coef))
    rules["confidence"] = reg.predict(X)
    return rules.sort_values(by=["fromGroupArtifact", "confidence"], ascending=[True, False])
evaluate("Logistic Regression", regression(rules), possible_rule_set, confirmed_rule_set)

Coef confTeyton: 0.04874140046755714
Coef apiSupportMin: 0.020765070348091084
Coef commitDistance: 0.04974788599754907
Result of Logistic Regression:
MRR-C/P = 0.49600468509341167/0.5270856159885579
Top   1: Precision-C/P = 0.436/0.447, Recall-C/P = 0.142/0.026, NDCG-C/P = 0.436/0.447
Top   2: Precision-C/P = 0.299/0.332, Recall-C/P = 0.194/0.039, NDCG-C/P = 0.370/0.364
Top   3: Precision-C/P = 0.254/0.300, Recall-C/P = 0.246/0.053, NDCG-C/P = 0.360/0.343
Top   4: Precision-C/P = 0.225/0.265, Recall-C/P = 0.291/0.062, NDCG-C/P = 0.369/0.326
Top   5: Precision-C/P = 0.204/0.240, Recall-C/P = 0.329/0.071, NDCG-C/P = 0.371/0.315
Top   6: Precision-C/P = 0.182/0.216, Recall-C/P = 0.353/0.076, NDCG-C/P = 0.368/0.301
Top   7: Precision-C/P = 0.163/0.196, Recall-C/P = 0.367/0.081, NDCG-C/P = 0.365/0.289
Top   8: Precision-C/P = 0.148/0.183, Recall-C/P = 0.381/0.086, NDCG-C/P = 0.369/0.282
Top   9: Precision-C/P = 0.140/0.171, Recall-C/P = 0.405/0.090, NDCG-C/P = 0.374/0.279
Top  10: Precision