In [1]:
import numpy as np
import pandas as pd
import json
import os
from IR.Searcher.Index_Searcher import Index_Searcher
from Scorer.MajorityVotingScorer import MajorityVotingScorer

## load Blizzard query into dataframe

In [2]:
# load the blizzard reformulated query into dataframe
blizzard_query_path = 'F:\Data\Blizzzard\BLIZZARD-Replication-Package-ESEC-FSE2018-master\BLIZZARD\Query'

In [3]:
blizzard_query_df = pd.DataFrame()
blizzard_query_dict_list = []

In [4]:
# list all the files in the blizzard query folder
blizzard_query_dir_REPO = os.listdir(blizzard_query_path)

# iterate through all the directories
for repo in blizzard_query_dir_REPO:
    # get the path of the directory
    dir_path = os.path.join(blizzard_query_path, repo)
    # get all the files in the directory
    files = os.listdir(dir_path)
    # iterate through all the files
    for file in files:
        # get the path of the file
        file_path = os.path.join(dir_path, file)
        # read the text file
        with open(file_path, 'r') as f:
            # read the file line by line and separate the bug id and the query based on the tab
            for line in f:
                # split the line based on the tab
                line = line.split('\t')
                # get the bug id
                bug_id = line[0]
                # get the query
                query = line[1]

                # now, put the bug id, repo and the query in a dictionary
                blizzard_query_dict = {'bug_id': bug_id, 'repo': repo, 'query': query}
                # append the dictionary to the list
                blizzard_query_dict_list.append(blizzard_query_dict)

# convert the list of dictionaries to a dataframe
blizzard_query_df = pd.DataFrame.from_dict(blizzard_query_dict_list)

In [6]:
blizzard_query_df.tail()

Unnamed: 0,bug_id,repo,query
5134,58946,tomcat70,ParameterMap put Parameter Map jspService test...
5135,59015,tomcat70,destroy AprEndpoint Sendfile stopInternal stop...
5136,59054,tomcat70,setAttribute CrawlerSessionManagerValve Standa...
5137,59151,tomcat70,SaveContextOnUpdateOrErrorResponseWrapper send...
5138,59317,tomcat70,ServletServerHttpRequest AbstractHandlerMappin...


# now load the test set into a dataframe

In [7]:
# load the test_df from json
with open('../Data/Outputs/test_recommendations.json', 'r') as file:
    data = file.read()
    data = json.loads(data)

test_df = pd.DataFrame.from_dict(data)

In [9]:
test_df.tail()

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations
95,312646,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[plug HEAD tree eclipse check launch Select tr...,empty feature list when 1 is selected,Using HEAD: * New Eclipse Application launch c...,[plug HEAD tree eclipse check launch Select tr...,"[[selecting, org, org, dropped, pde, select, r..."
96,51447,[java/org/apache/catalina/manager/HTMLManagerS...,tomcat70,[type reproduce findSession instances Map getM...,Session type changes from Backup to Primary,Created attachment [details] patch Steps to r...,[type details LazyReplicatedMap Session getSes...,"[[attached, backup, backup, URLAndId, informat..."
97,301894,"[org.eclipse.jdt.apt.core/build_notes.html, or...",eclipse.jdt.core,[annotations Build annotations adopters issues...,OutOfMemory error when building a large proje...,Build Identifier: .. + R342patch_1.. This bug ...,[annotations Build annotations adopters issues...,"[[Build, builds, bug, Bug, large, support, fol..."
98,232463,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[Target Restore fresh click BUG remains platfo...,Target Platform pref page does not fully rese...,I20080515-. . start fresh workspace . goto to ...,[Target checked PDE preference change Target P...,"[[target, change, Target, target, target, chan..."
99,55905,[java/org/apache/catalina/startup/LocalStrings...,tomcat70,[FileNotFoundException process Tld path Note x...,Error message unhelpful when web.xml referenc...,In your web-application web.xml add a taglib e...,[doesn uri FileNotFoundException MalformedURLE...,"[[Element, java, folder, jsp, Web, config, pat..."


In [8]:
# check each bug id and repo in the test set and see if it is in the blizzard query set
# if it is, then get the query from the blizzard query set and put it in the new column in the test set named 'baseline_query'

# create a new column in the test set named 'baseline_query'
test_df['baseline_query'] = ''



In [10]:
# iterate through all the rows in the test set
for index, row in test_df.iterrows():
    # get the bug id and repo from the test set
    bug_id = row['bug_id']
    repo = row['repo']
    # check if the bug id and repo is in the blizzard query set
    if blizzard_query_df[(blizzard_query_df['bug_id'] == bug_id) & (blizzard_query_df['repo'] == repo)].empty:
        # if it is not in the blizzard query set, then continue
        continue
    else:
        # if it is in the blizzard query set, then put the query from the blizzard query set in the baseline_query column
        test_df.at[index, 'baseline_query'] = blizzard_query_df[(blizzard_query_df['bug_id'] == bug_id) & (blizzard_query_df['repo'] == repo)]['query'].values[0]

In [11]:
test_df.tail()

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations,baseline_query
95,312646,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[plug HEAD tree eclipse check launch Select tr...,empty feature list when 1 is selected,Using HEAD: * New Eclipse Application launch c...,[plug HEAD tree eclipse check launch Select tr...,"[[selecting, org, org, dropped, pde, select, r...",Bug empty feature list selected HEAD Eclipse A...
96,51447,[java/org/apache/catalina/manager/HTMLManagerS...,tomcat70,[type reproduce findSession instances Map getM...,Session type changes from Backup to Primary,Created attachment [details] patch Steps to r...,[type details LazyReplicatedMap Session getSes...,"[[attached, backup, backup, URLAndId, informat...",type session backup primary session sessions f...
97,301894,"[org.eclipse.jdt.apt.core/build_notes.html, or...",eclipse.jdt.core,[annotations Build annotations adopters issues...,OutOfMemory error when building a large proje...,Build Identifier: .. + R342patch_1.. This bug ...,[annotations Build annotations adopters issues...,"[[Build, builds, bug, Bug, large, support, fol...",outofmemory bug memory error project annotatio...
98,232463,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[Target Restore fresh click BUG remains platfo...,Target Platform pref page does not fully rese...,I20080515-. . start fresh workspace . goto to ...,[Target checked PDE preference change Target P...,"[[target, change, Target, target, target, chan...",Bug Target Platform pref fully reset Restore D...
99,55905,[java/org/apache/catalina/startup/LocalStrings...,tomcat70,[FileNotFoundException process Tld path Note x...,Error message unhelpful when web.xml referenc...,In your web-application web.xml add a taglib e...,[doesn uri FileNotFoundException MalformedURLE...,"[[Element, java, folder, jsp, Web, config, pat...",message tld file error web xml references does...


In [12]:
# check if the baseline_query column has any empty values
test_df[test_df['baseline_query'] == '']

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations,baseline_query


In [13]:
# check the number of empty values
len(test_df[test_df['baseline_query'] == ''])

0

In [14]:
# now, check how many unique queries are there in the baseline_query column
len(test_df['baseline_query'].unique())

100

In [16]:
# save the test_df to json as array of dictionaries
test_df.to_json('../Data/Outputs/test_recommendations_baseline.json', orient='records')

In [17]:
# now, load the test_df from json
with open('../Data/Outputs/test_recommendations_baseline.json', 'r') as file:
    data = file.read()
    data = json.loads(data)

test_df = pd.DataFrame.from_dict(data)

In [18]:
test_df.tail()

Unnamed: 0,bug_id,ground_truth,repo,reformed_query,bug_title,bug_description,effective_queries,query_recommendations,baseline_query
95,312646,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[plug HEAD tree eclipse check launch Select tr...,empty feature list when 1 is selected,Using HEAD: * New Eclipse Application launch c...,[plug HEAD tree eclipse check launch Select tr...,"[[selecting, org, org, dropped, pde, select, r...",Bug empty feature list selected HEAD Eclipse A...
96,51447,[java/org/apache/catalina/manager/HTMLManagerS...,tomcat70,[type reproduce findSession instances Map getM...,Session type changes from Backup to Primary,Created attachment [details] patch Steps to r...,[type details LazyReplicatedMap Session getSes...,"[[attached, backup, backup, URLAndId, informat...",type session backup primary session sessions f...
97,301894,"[org.eclipse.jdt.apt.core/build_notes.html, or...",eclipse.jdt.core,[annotations Build annotations adopters issues...,OutOfMemory error when building a large proje...,Build Identifier: .. + R342patch_1.. This bug ...,[annotations Build annotations adopters issues...,"[[Build, builds, bug, Bug, large, support, fol...",outofmemory bug memory error project annotatio...
98,232463,[ui/org.eclipse.pde.ui/src/org/eclipse/pde/int...,eclipse.pde.ui,[Target Restore fresh click BUG remains platfo...,Target Platform pref page does not fully rese...,I20080515-. . start fresh workspace . goto to ...,[Target checked PDE preference change Target P...,"[[target, change, Target, target, target, chan...",Bug Target Platform pref fully reset Restore D...
99,55905,[java/org/apache/catalina/startup/LocalStrings...,tomcat70,[FileNotFoundException process Tld path Note x...,Error message unhelpful when web.xml referenc...,In your web-application web.xml add a taglib e...,[doesn uri FileNotFoundException MalformedURLE...,"[[Element, java, folder, jsp, Web, config, pat...",message tld file error web xml references does...


In [19]:
len(test_df)

100