In [1]:
import numpy as np
import pandas as pd
import json
from IR.Searcher.Index_Searcher import Index_Searcher
from Scorer.MajorityVotingScorer import MajorityVotingScorer

In [2]:
# load the test_df from json
with open('../Data/Outputs/test_recommendations.json', 'r') as file:
    data = file.read()
    data = json.loads(data)

test_df = pd.DataFrame.from_dict(data)

In [3]:
# print the test_df
test_df.head()

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations
0,289455,[org.eclipse.jdt.junit/src/org/eclipse/jdt/int...,eclipse.jdt.ui,[start create JUnit active active Bug active s...,[JUnit] JUnit view not active after test,I20090915-. JUnit view is not active after tes...,[start create JUnit active active Bug active s...,"[[JUnit, Java, fresh, Bug, view, Explorer], [J..."
1,165045,[ui/org.eclipse.pde.core/src/org/eclipse/pde/i...,eclipse.pde.ui,[icons actions importing created project build...,Import plugin with source leaves a warning ab...,After exporting a plug-in project with source ...,[icons actions importing created project build...,"[[export, file, references, Export, files, exp..."
2,49030,[java/org/apache/catalina/core/StandardService...,tomcat70,[listening situation uninitialized xml Server ...,Processing of Tomcat Connector configuration ...,If the Server part of the Tomcat configuratio...,[listening situation uninitialized xml Server ...,"[[problem, problem, wrong, wrong, incorrect, c..."
3,55582,[java/org/apache/jasper/compiler/TagFileProces...,tomcat70,[threads lines File TagFileProcessor wrapperUr...,Concurrent issue of TagFileProcessor,Created attachment [details] Unsynchronized g...,[threads lines File TagFileProcessor wrapperUr...,"[[issue, instance, Thread, Unsynchronized, Jsp..."
4,134483,[framework/bundles/org.eclipse.ecf/src/org/ecl...,ecf,[Application Container Standalone Factory Cont...,Standalone ClientApplication is breaks in lin...,The standalone org.eclipse.ecf.provider.app.Cl...,[Application Container Standalone Factory Cont...,"[[Eclipse, ExceptionInInitializerError, Client..."


## Now Evaluation

In [4]:
# load the index searcher
searcher = Index_Searcher()

In [5]:
all_ground_truths = []
all_search_results = []

### first collect all the search results for each bug description

In [6]:
# iterate over the test_df
for index, row in test_df.iterrows():
    # get query
    ground_truth = row['ground_truth']
    bug_description = row['bug_description']

    search_results_collection = []

    search_results = searcher.search(query=bug_description, top_K_results=10)


    all_ground_truths.append(ground_truth)
    all_search_results.append(search_results)

### Now evaluate the results

In [7]:
from Performance.Performance_Evaluator import Performance_Evaluator

performance_evaluator = Performance_Evaluator()

### Evaluate the results for K = 1, 5, 10

#### K = 1

In [13]:
at_1 = performance_evaluator.evaluate(ground_truths=all_ground_truths, search_results=all_search_results, K=1)

#### K = 5

In [14]:
at_5 = performance_evaluator.evaluate(ground_truths=all_ground_truths, search_results=all_search_results, K=5)

#### K = 10

In [15]:
at_10 = performance_evaluator.evaluate(ground_truths=all_ground_truths, search_results=all_search_results, K=10)

In [18]:
# create dictionary for the results
results = {
    'at_1': at_1,
    'at_5': at_5,
    'at_10': at_10
}

In [19]:
# print the results
print(results)

{'at_1': {'map': 0.24675, 'mrr': 0.2526111111111111, 'recall@1': 0.0891025641025641, 'hit@1': 0.16}, 'at_5': {'map': 0.24675, 'mrr': 0.2526111111111111, 'recall@5': 0.23576923076923076, 'hit@5': 0.37}, 'at_10': {'map': 0.24675, 'mrr': 0.2526111111111111, 'recall@10': 0.28076923076923077, 'hit@10': 0.43}}


In [20]:
baseline_data = {
    'at_1': at_1,
    'at_5': at_5,
    'at_10': at_10
}

In [21]:
for key in baseline_data:
    baseline_data[key].pop('mrr', None)
    baseline_data[key].pop('map', None)

print(baseline_data)

{'at_1': {'recall@1': 0.0891025641025641, 'hit@1': 0.16}, 'at_5': {'recall@5': 0.23576923076923076, 'hit@5': 0.37}, 'at_10': {'recall@10': 0.28076923076923077, 'hit@10': 0.43}}
