In [1]:
import numpy as np
import pandas as pd
import json
from IR.Searcher.Index_Searcher import Index_Searcher
from Scorer.MajorityVotingScorer import MajorityVotingScorer

In [2]:
# load the test_df from json
with open('../Data/Outputs/test_recommendations.json', 'r') as file:
    data = file.read()
    data = json.loads(data)

test_df = pd.DataFrame.from_dict(data)

In [3]:
# print the test_df
test_df.head()

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations
0,289455,[org.eclipse.jdt.junit/src/org/eclipse/jdt/int...,eclipse.jdt.ui,[start create JUnit active active Bug active s...,[JUnit] JUnit view not active after test,I20090915-. JUnit view is not active after tes...,[start create JUnit active active Bug active s...,"[[JUnit, Java, fresh, Bug, view, Explorer], [J..."
1,165045,[ui/org.eclipse.pde.core/src/org/eclipse/pde/i...,eclipse.pde.ui,[icons actions importing created project build...,Import plugin with source leaves a warning ab...,After exporting a plug-in project with source ...,[icons actions importing created project build...,"[[export, file, references, Export, files, exp..."
2,49030,[java/org/apache/catalina/core/StandardService...,tomcat70,[listening situation uninitialized xml Server ...,Processing of Tomcat Connector configuration ...,If the Server part of the Tomcat configuratio...,[listening situation uninitialized xml Server ...,"[[problem, problem, wrong, wrong, incorrect, c..."
3,55582,[java/org/apache/jasper/compiler/TagFileProces...,tomcat70,[threads lines File TagFileProcessor wrapperUr...,Concurrent issue of TagFileProcessor,Created attachment [details] Unsynchronized g...,[threads lines File TagFileProcessor wrapperUr...,"[[issue, instance, Thread, Unsynchronized, Jsp..."
4,134483,[framework/bundles/org.eclipse.ecf/src/org/ecl...,ecf,[Application Container Standalone Factory Cont...,Standalone ClientApplication is breaks in lin...,The standalone org.eclipse.ecf.provider.app.Cl...,[Application Container Standalone Factory Cont...,"[[Eclipse, ExceptionInInitializerError, Client..."


## Now Evaluation

In [4]:
# load the index searcher
searcher = Index_Searcher()

In [6]:
all_ground_truths = []
all_search_results = []

### first collect all the search results for each query recommendation

In [8]:
# iterate over the test_df
for index, row in test_df.iterrows():
    # get query
    ground_truth = row['ground_truth']
    query_recommendations = row['query_recommendations']

    search_results_collection = []

    # iterate over query recommendations
    for recommendation in query_recommendations:
        query = ' '.join(recommendation)

        # get search results
        search_results = searcher.search(query=query, top_K_results=10)

        # append search results to search_results_collection
        search_results_collection.append(search_results)

    # Majority Voting Scorer
    # Scoring: after each round of searches for recommended queries, score the results
    majority_scorer = MajorityVotingScorer(List_Collection=search_results_collection)
    scored_list = majority_scorer.score_items(do_sort=True, only_keys_array=True)

    all_ground_truths.append(ground_truth)
    all_search_results.append(scored_list)

### Now evaluate the results

In [9]:
from Performance.Performance_Evaluator import Performance_Evaluator

performance_evaluator = Performance_Evaluator()

In [11]:
performance_evaluator.evaluate(ground_truths=all_ground_truths, search_results=all_search_results, K=10)

{'map': 0.13361007603485336,
 'mrr': 0.13422953416293215,
 'recall@10': 0.16826923076923075,
 'hit@10': 0.27,
 'precision@10': 0.02700000000000001}