In [0]:
import numpy as np
import seaborn as sns
import pandas as pd
import openai
import json
import re
import tqdm
import pickle
import hnswlib
import copy
import torch
import accelerate
import os
import torch.nn as nn

import pytrec_eval

if torch.backends.cuda.is_built():
    DEVICE = 'cuda:0'
else:
    DEVICE = 'cpu'

import random
import time
from collections import defaultdict
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import sys


from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer, util, CrossEncoder
from collections import Counter

# For parallize calling chatGPT
from concurrent.futures import ThreadPoolExecutor
import concurrent

import warnings
warnings.simplefilter('ignore', FutureWarning)

AUTHOR2EMBEDDING = json.load(open("../data/AUTHOR2EMBEDDING.json"))
EID2EMBEDDING = json.load(open("../data/EID2EMBEDDING.json"))


class QueryExpansion():
    def __init__(self, query2doc: dict, folderName: str, top_k: int = 1000, maxWorker: int = 5, maxTokens: int = 800,
                 maxAbsLens: int = 8000, ifRankAbs: bool = False):
        self.data = query2doc

        ## folder for store result
        self.path = '../results'
        
        if not os.path.exists(self.path):
            os.makedirs(self.path)
            print("Path created:", self.path)
        else:
            print("Path already exists:", self.path)

        ## set up open AI
        openai.api_base = "https://api.openai.com/v1"
        openai.api_key = 'your open ai api key'

        self.maxTokens = maxTokens
        self.maxAbsLens = maxAbsLens
        self.ifRankAbs = ifRankAbs

        ## set search index
        self.top_k = top_k
        self.searching_index = hnswlib.Index(space='cosine', dim=384)
        self.searching_index.load_index('../data/hnsw_index.idx')
        self.searching_index.set_ef(int(self.top_k * 1.5))

        ## embedding model
        self.model = SentenceTransformer('all-MiniLM-L12-v2', device=DEVICE)

        ## LLM expansion
        self.max_workers = maxWorker
        self.overall_result = pd.DataFrame({'Items': [], 'MAP@10': [], 'NDCG@10': [], 'NDCG@20': [], \
                                            'Recall@10': [], 'Recall@50': [], 'Recall@200': [], 'Precision@10': [],'Precision@100': [], 'MRR': []})
        self.result = pd.DataFrame({'Query': [], 'Query NDCG@10': [], \
                                    'Expanded Query': [], 'Expanded Query NDCG@10': [], \
                                    'Personalised Expanded Query 1': [], 'Personalised Expanded Query 1 NDCG@10': [], \
                                    'Personalised Expanded Query 2': [], 'Personalised Expanded Query 2 NDCG@10': [], \
                                    'Personalised Expanded Query 3': [], 'Personalised Expanded Query 3 NDCG@10': [], \
                                    'Personalised Expanded Query (include all keywords)': [],
                                    'Personalised Expanded Query (include all keywords) NDCG@10': [], 'Personalised Expanded Query NDCG@10': []})
        

    def clear_result(self):
        self.overall_result = pd.DataFrame({'Items': [], 'MAP@10': [], 'NDCG@10': [], 'NDCG@20': [], \
                                            'Recall@10': [], 'Recall@50': [], 'Recall@200': [], 'Relavance@10': []})

    def F_call_chatGPT(self, system_message: str, prompt: str):
        message_text = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]

        for attempt in range(10):
            try:
                completion = openai.ChatCompletion.create(
                model="gpt-4o-mini",
                messages=message_text,
                temperature=1,
                max_tokens=self.maxTokens,
                top_p=1e-16,
                n=1,
                logprobs=False,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None
                )

                try:
                    return (completion['choices'][0]['message']['content'])
                except:
                    return "LLMs Failed"
            except openai.OpenAIError as error:
                if attempt < 5 - 1:
                    time.sleep(60)
                else:
                    break


        


    def evalPerformance(self, query_2_click_doc: dict, query_key: str):
        queries = []
        keys = []
        for sid_query in query_2_click_doc:
            queries.append(query_2_click_doc[sid_query][query_key])
            keys.append(sid_query)

        embs = self.model.encode(queries, batch_size=128, show_progress_bar=False, device=DEVICE)
        query_2_embedding = dict(zip(keys, embs))

        del queries
        del embs

        # output variables
        run = defaultdict(dict)
        qrel = defaultdict(dict)

        for sidquery in list(query_2_click_doc.keys()):

            # query embedding
            query_vector = query_2_embedding[sidquery]
            query_vector = query_vector.astype('float32')

            labels, distances = self.searching_index.knn_query(query_vector, k=self.top_k)
            
            # output prediction
            for i in range(self.top_k):
                run[sidquery][str(labels[0][i])] = self.top_k - i

            # for i in range(len(labels[0])):
            # run[sidquery][str(labels[0][i])] = 1 - distances[0][i]


            # output ground truth
            for eid, rank in query_2_click_doc[sidquery]['click_doc_list']:
                qrel[sidquery][str(eid)] = 1

        evaluator = pytrec_eval.RelevanceEvaluator(qrel, {'map_cut_10', 'ndcg_cut_10', 'ndcg_cut_20', \
                                                    'recall_10', 'recall_50', 'recall_200', 'P_10', 'P_100', 'recip_rank'})
        result = evaluator.evaluate(run)
        avg_map = round(sum([sub_dict['map_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_10 = round(sum([sub_dict['ndcg_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_20 = round(sum([sub_dict['ndcg_cut_20'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_10 = round(sum([sub_dict['recall_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_50 = round(sum([sub_dict['recall_50'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_200 = round(sum([sub_dict['recall_200'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_10 = round(sum([sub_dict['P_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_100 = round(sum([sub_dict['P_100'] for sub_dict in result.values()]) / len(result), 3)
        avg_mrr = round(sum([sub_dict['recip_rank'] for sub_dict in result.values()]) / len(result), 3)

        # print(f"MAP@10: {avg_map}, NDCG@10: {avg_ndcg_10}, NDCG@20: {avg_ndcg_20}, Recall@10:{avg_recall_10}, @50: {avg_recall_50}, @100: {avg_recall_100}")
        overall_result = {'MAP@10': avg_map, 'NDCG@10': avg_ndcg_10, 'NDCG@20': avg_ndcg_20, \
                          'Recall@10': avg_recall_10, 'Recall@50': avg_recall_50, 'Recall@200': avg_recall_200, 'Precision@10': avg_precision_10, 'Precision@100': avg_precision_100, 'MRR': avg_mrr}

        return result, overall_result
    

    def cosine_similarity(self, vector1, vector2):
        dot_product = np.dot(vector1, vector2)
        norm_vector1 = np.linalg.norm(vector1)
        norm_vector2 = np.linalg.norm(vector2)
        similarity = dot_product / (norm_vector1 * norm_vector2)
        return similarity

    def rerank(self, emb, expanded_emb, depth):
        labels, distances = self.searching_index.knn_query(emb, k=self.top_k)
        new_distances = []
        for label in labels[0][:depth]:
            doc_emb = self.searching_index.get_items([label])[0]
            new_distances.append(1 - self.cosine_similarity(expanded_emb, doc_emb))
        sorted_indices = np.argsort(np.array(new_distances))
        reranked_distances = np.concatenate((np.array(new_distances)[sorted_indices], distances[0][depth:]))
        reranked_label = np.concatenate((labels[0][:depth][sorted_indices], labels[0][depth:]))
        return reranked_label.reshape((1, -1)), reranked_distances.reshape(1, -1)

    def evalRerankPerformance(self, query_2_click_doc: dict, query_key: str, depth: int):
        queries = []
        expanded_queries = []
        keys = []
        for sid_query in query_2_click_doc:
            queries.append(query_2_click_doc[sid_query]['query'])
            expanded_queries.append(query_2_click_doc[sid_query][query_key])
            keys.append(sid_query)
        embs = self.model.encode(queries, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs_expanded = self.model.encode(expanded_queries, batch_size=128, show_progress_bar=False, device=DEVICE)
        # print(np.sum(embs - embs_expanded))

        query_2_embedding = dict(zip(keys, embs))
        expanded_query_2_embedding = dict(zip(keys, embs_expanded))

        del queries
        del expanded_queries
        del embs_expanded
        del embs

        # output variables
        run = defaultdict(dict)
        qrel = defaultdict(dict)

        relevances = []

        for sidquery in list(query_2_click_doc.keys()):

            # query embedding
            query_vector = query_2_embedding[sidquery]
            query_vector = query_vector.astype('float32')
            expanded_query_vector = expanded_query_2_embedding[sidquery]
            expanded_query_vector = expanded_query_vector.astype('float32')

            labels, distances = self.rerank(query_vector, expanded_query_vector, depth)

            # output prediction
            for i in range(self.top_k):
                run[sidquery][str(labels[0][i])] = self.top_k - i
            

            # output ground truth
            for eid, rank in query_2_click_doc[sidquery]['click_doc_list']:
                qrel[sidquery][str(eid)] = 1

        evaluator = pytrec_eval.RelevanceEvaluator(qrel, {'map_cut_10', 'ndcg_cut_10', 'ndcg_cut_20', \
                                                          'recall_10', 'recall_50', 'recall_200', 'P_10', 'P_100', 'recip_rank'})
        result = evaluator.evaluate(run)

        avg_map = round(sum([sub_dict['map_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_10 = round(sum([sub_dict['ndcg_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_20 = round(sum([sub_dict['ndcg_cut_20'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_10 = round(sum([sub_dict['recall_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_50 = round(sum([sub_dict['recall_50'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_200 = round(sum([sub_dict['recall_200'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_10 = round(sum([sub_dict['P_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_100 = round(sum([sub_dict['P_100'] for sub_dict in result.values()]) / len(result), 3)
        avg_mrr = round(sum([sub_dict['recip_rank'] for sub_dict in result.values()]) / len(result), 3)


        # print(f"MAP@10: {avg_map}, NDCG@10: {avg_ndcg_10}, NDCG@20: {avg_ndcg_20}, Recall@10:{avg_recall_10}, @50: {avg_recall_50}, @100: {avg_recall_100}")
        overall_result = {'MAP@10': avg_map, 'NDCG@10': avg_ndcg_10, 'NDCG@20': avg_ndcg_20, \
                          'Recall@10': avg_recall_10, 'Recall@50': avg_recall_50, 'Recall@200': avg_recall_200, 'Precision@10': avg_precision_10, 'Precision@100': avg_precision_100, 'MRR': avg_mrr}

        return result, overall_result

    def LLMAuthorRepresentation(self, docs_abs: list):
        author_abstracts = ''
        for i_abs, abs in enumerate(docs_abs):
            author_abstracts += f"\tAbstract {i_abs + 1}: {abs}\n"
            if len(author_abstracts) > self.maxAbsLens:
                break

        system_message = """You are an AI assistant specialized in providing a list of keywords that represent an author's field based on a list of abstracts from the author's publications. 
    The output should be in JSON format, containing one key: 'keywords'. 
    The value of 'keywords' should be a list of keywords with a length of less than 10. 
    Also, please note that no explanation about how these keywords are obtained is required.
    PLEASE DO NOT PROVIDE ANY EXPLANATION IN THE OUTPUT! PLEASE DO NOT INCLUDE ANY OTHER INFORMATION!!!
    Please DO NOT GIVE ANY OTHER TEXT, ONLY GIVE ME ONE JSON OUTPUT!
    PLEASE ONLY GIVE ME ONE JSON OUTPUT!
    """
        count = 0
        while count <= 2:
            json_string = self.F_call_chatGPT(system_message, author_abstracts)
            try:
                author_keywords = json.loads(json_string)['keywords']
                return author_keywords
            except:
                count += 1
                continue
        return json_string

    def LLMExpansion(self, query: str, author_keywords: list):
        system_message = """
    You are an AI assistant specialized in refining search queries for scientific documents. Your task is to expand a given query with the top three phrases that are most relevant to the original query and replace all acronyms with their full forms from the user’s domain of expertise. Each expansion must not contain the original acronym but its full form. The user’s expertise is represented by specific keywords. However, expansions must be directly related to the original query, even if they match the user’s expertise. Provide a brief rationale for your choices. Your output should be a JSON object with two keys: ‘rationale’ and ‘expansion’. The ‘expansion’ should include a list of the top THREE related phrases and full forms of acronyms. Remember, only include a short rationale and avoid any additional explanation in your output.
    """
        prompt = f"""
    Given the query and user representation below, expand the query with the top three phrases that are most relevant to the original query and replace any acronyms with their full forms from the user’s domain of expertise. Even if phrases match the user’s expertise, ensure they are directly related to the original query and do not contain the original acronym. If the query aligns with the user’s expertise, prioritize it; if not, focus on general expansion.
    Query: {query}
    User Representation: {author_keywords}
    """

        count = 0
        while count <= 2:
            expanded_query_GPT_answer = self.F_call_chatGPT(system_message, prompt)
            try:
                expanded_query = json.loads(expanded_query_GPT_answer)['expansion']
                return expanded_query
            except:
                count += 1
                continue
        return expanded_query_GPT_answer
    
    def LLMAnswer(self, query: str):
        system_message = """You are an AI assistant to answer query.
    Your output should be a JSON array with two keys: "rationale" and "answer". The "answer" should answer the input query. Remember, only include a short rationale and avoid any additional explanation in your output.
    """

        prompt = f"""Answer the following question:
    query: {query} 
    Give the rationale before answering.
    """
        # count = 0
        # while count <= 2:
        expanded_query_GPT_answer = self.F_call_chatGPT(system_message, prompt)
        try:
            expanded_query = json.loads(expanded_query_GPT_answer)['answer']
            return expanded_query
        except:
            return expanded_query_GPT_answer

 

    def getABS(self, query: str, authorID: int):
        queryEmb = self.model.encode(query, device=DEVICE)
        docs = AUTHOR2EMBEDDING[str(authorID)]['eids']

        contents = [EID2EMBEDDING[eid]['content'] for eid in docs]
        docEmbs = [EID2EMBEDDING[eid]['embedding'] for eid in docs]

        docDistances = [1 - self.cosine_similarity(queryEmb, docEmb) for docEmb in docEmbs]

        sorted_lists = sorted(zip(docDistances, contents))
        docDistances, contents = zip(*sorted_lists)

        return [contents[0]]

    def extractAuthorRepresentation(self):
        df_scopus_user_dict = {}
        for key, value in self.data.items():
            if self.ifRankAbs:
                df_scopus_user_dict[key] = self.getABS(value['query'], value['author_id'])
            else:
                docs = AUTHOR2EMBEDDING[value['author_id']]['eids']
                df_scopus_user_dict[key] = [EID2EMBEDDING[eid]['content'] for eid in docs]
        
        # for key, value in self.data.items():
        #     try:
        #         value['author_keywords']
        #     except:
        #         value['author_keywords'] = self.LLMAuthorRepresentation(df_scopus_user_dict[key])
        
        def process_item(key, value):
            try:
                value['author_keywords']
            except:
                value['author_keywords'] = self.LLMAuthorRepresentation(df_scopus_user_dict[key])
            return 0

        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(process_item, key, value) for key, value in self.data.items()]
            for future in tqdm.tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Processing'):
                future.result()

    def personalisedExpandQuery(self):
        def process_item(key, value):
            try:
                value['expanded_keywords']
            except:
                expanded_keywords = self.LLMExpansion(value['query'], value['author_keywords'])
                value['expanded_keywords'] = expanded_keywords
                time.sleep(0.1)
            return 0

        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(process_item, key, value) for key, value in self.data.items()]
            for future in tqdm.tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Processing'):
                future.result()

    def expandQuery(self):
        def process_item(key, value):
            try:
                value['answer_query']
            except:
                expanded_query = self.LLMAnswer(value['query'])
                value['answer_query'] = expanded_query
                time.sleep(0.1)
            return 0

        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(process_item, key, value) for key, value in self.data.items()]
            for future in tqdm.tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Processing'):
                future.result()

    def storeData(self, filename: str):
        with open(self.path+filename, 'wb') as file:
            pickle.dump(self.data, file)

    def readData(self, filename: str):
        with open(self.path+filename, 'rb') as file:
            self.data = pickle.load(file)

    def writeReadMe(self, text: str):
        with open(self.path+'README.txt', 'w') as file:
            file.write(text)

    
    def evalRerunkExpandedPerformance(self, query_2_click_doc: dict, depth: int):
        queries = []
        expandedQuery1 = []
        expandedQuery2 = []
        expandedQuery3 = []
        keys = []
        for sid_query in query_2_click_doc:
            query = query_2_click_doc[sid_query]['query']
            queries.append(query)
            expandedQuery1.append(query + ', ' + query_2_click_doc[sid_query]['expanded_keywords'][0])
            expandedQuery2.append(query + ', ' + query_2_click_doc[sid_query]['expanded_keywords'][1])
            expandedQuery3.append(query + ', ' + query_2_click_doc[sid_query]['expanded_keywords'][2])
            keys.append(sid_query)

        embs = self.model.encode(queries, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs1 = self.model.encode(expandedQuery1, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs2 = self.model.encode(expandedQuery2, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs3 = self.model.encode(expandedQuery3, batch_size=128, show_progress_bar=False, device=DEVICE)
        query_2_embedding = dict(zip(keys, embs))
        expanded1_2_embedding = dict(zip(keys, embs1))
        expanded2_2_embedding = dict(zip(keys, embs2))
        expanded3_2_embedding = dict(zip(keys, embs3))

        del queries
        del embs
        del embs1
        del embs2
        del embs3

        # output variables
        run = defaultdict(dict)
        qrel = defaultdict(dict)

        relevances = []

        for sidquery in list(query_2_click_doc.keys()):

            # query embedding
            query_vector = query_2_embedding[sidquery]
            expanded_query_vector = (expanded1_2_embedding[sidquery] + expanded2_2_embedding[sidquery] + expanded3_2_embedding[sidquery]) / 3
            expanded_query_vector = expanded_query_vector.astype('float32')

            labels, distances = self.rerank(query_vector, expanded_query_vector, depth)
            
            # output prediction
            for i in range(self.top_k):
                run[sidquery][str(labels[0][i])] = self.top_k - i

            # for i in range(len(labels[0])):
            # run[sidquery][str(labels[0][i])] = 1 - distances[0][i]

            if self.ifRelevance:  
                relevances.append(self.evalRelevanceList(query_2_click_doc[sid_query]['query'], labels[0][:10]))
            else:
                relevances.append(0)

            # output ground truth
            for eid, rank in query_2_click_doc[sidquery]['click_doc_list']:
                qrel[sidquery][str(eid)] = 1

        evaluator = pytrec_eval.RelevanceEvaluator(qrel, {'map_cut_10', 'ndcg_cut_10', 'ndcg_cut_20', \
                                                          'recall_10', 'recall_50', 'recall_200', 'P_10', 'P_100', 'recip_rank'})
        result = evaluator.evaluate(run)
        avg_map = round(sum([sub_dict['map_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_10 = round(sum([sub_dict['ndcg_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_20 = round(sum([sub_dict['ndcg_cut_20'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_10 = round(sum([sub_dict['recall_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_50 = round(sum([sub_dict['recall_50'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_200 = round(sum([sub_dict['recall_200'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_10 = round(sum([sub_dict['P_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_100 = round(sum([sub_dict['P_100'] for sub_dict in result.values()]) / len(result), 3)
        avg_mrr = round(sum([sub_dict['recip_rank'] for sub_dict in result.values()]) / len(result), 3)

        relevance_10 = round(sum(relevances) / len(result), 3)

        # print(f"MAP@10: {avg_map}, NDCG@10: {avg_ndcg_10}, NDCG@20: {avg_ndcg_20}, Recall@10:{avg_recall_10}, @50: {avg_recall_50}, @100: {avg_recall_100}")
        overall_result = {'MAP@10': avg_map, 'NDCG@10': avg_ndcg_10, 'NDCG@20': avg_ndcg_20, \
                          'Recall@10': avg_recall_10, 'Recall@50': avg_recall_50, 'Recall@200': avg_recall_200, 'Precision@10': avg_precision_10, 'Precision@100': avg_precision_100, 'MRR': avg_mrr, 'Relavance@10': relevance_10}

        return result, overall_result

    
    def evalExpandedPerformance(self, query_2_click_doc: dict, key: str='expanded_keywords'):
        queries = []
        expandedQuery1 = []
        expandedQuery2 = []
        expandedQuery3 = []
        keys = []
        for sid_query in query_2_click_doc:
            query = query_2_click_doc[sid_query]['query']
            queries.append(query)
            expandedQuery1.append(query + ', ' + query_2_click_doc[sid_query][key][0])
            expandedQuery2.append(query + ', ' + query_2_click_doc[sid_query][key][1])
            expandedQuery3.append(query + ', ' + query_2_click_doc[sid_query][key][2])
            keys.append(sid_query)

        embs = self.model.encode(queries, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs1 = self.model.encode(expandedQuery1, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs2 = self.model.encode(expandedQuery2, batch_size=128, show_progress_bar=False, device=DEVICE)
        embs3 = self.model.encode(expandedQuery3, batch_size=128, show_progress_bar=False, device=DEVICE)
        query_2_embedding = dict(zip(keys, embs))
        expanded1_2_embedding = dict(zip(keys, embs1))
        expanded2_2_embedding = dict(zip(keys, embs2))
        expanded3_2_embedding = dict(zip(keys, embs3))

        del queries
        del embs
        del embs1
        del embs2
        del embs3

        # output variables
        run = defaultdict(dict)
        qrel = defaultdict(dict)


        for sidquery in list(query_2_click_doc.keys()):

            # query embedding
            query_vector = (expanded1_2_embedding[sidquery] + expanded2_2_embedding[sidquery] + expanded3_2_embedding[sidquery]) / 3
            query_vector = query_vector.astype('float32')

            labels, distances = self.searching_index.knn_query(query_vector, k=self.top_k)
            
            # output prediction
            for i in range(self.top_k):
                run[sidquery][str(labels[0][i])] = self.top_k - i

            # for i in range(len(labels[0])):
            # run[sidquery][str(labels[0][i])] = 1 - distances[0][i]

            # output ground truth
            for eid, rank in query_2_click_doc[sidquery]['click_doc_list']:
                qrel[sidquery][str(eid)] = 1

        evaluator = pytrec_eval.RelevanceEvaluator(qrel, {'map_cut_10', 'ndcg_cut_10', 'ndcg_cut_20', \
                                                          'recall_10', 'recall_50', 'recall_200', 'P_10', 'P_100', 'recip_rank'})
        result = evaluator.evaluate(run)
        avg_map = round(sum([sub_dict['map_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_10 = round(sum([sub_dict['ndcg_cut_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_ndcg_20 = round(sum([sub_dict['ndcg_cut_20'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_10 = round(sum([sub_dict['recall_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_50 = round(sum([sub_dict['recall_50'] for sub_dict in result.values()]) / len(result), 3)
        avg_recall_200 = round(sum([sub_dict['recall_200'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_10 = round(sum([sub_dict['P_10'] for sub_dict in result.values()]) / len(result), 3)
        avg_precision_100 = round(sum([sub_dict['P_100'] for sub_dict in result.values()]) / len(result), 3)
        avg_mrr = round(sum([sub_dict['recip_rank'] for sub_dict in result.values()]) / len(result), 3)

        # print(f"MAP@10: {avg_map}, NDCG@10: {avg_ndcg_10}, NDCG@20: {avg_ndcg_20}, Recall@10:{avg_recall_10}, @50: {avg_recall_50}, @100: {avg_recall_100}")
        overall_result = {'MAP@10': avg_map, 'NDCG@10': avg_ndcg_10, 'NDCG@20': avg_ndcg_20, \
                          'Recall@10': avg_recall_10, 'Recall@50': avg_recall_50, 'Recall@200': avg_recall_200, 'Precision@10': avg_precision_10, 'Precision@100': avg_precision_100, 'MRR': avg_mrr}

        return result, overall_result


    def run(self):
        print("===Stage 1: Author Representation===")
        self.extractAuthorRepresentation()
        self.storeData(filename = 'data.pickle')

        print("===Stage 2: Personalised Expansion===")
        self.personalisedExpandQuery()
        self.storeData(filename = 'data.pickle')

        print("===Stage 3: Expand===")
        self.expandQuery()
        self.storeData(filename = 'data.pickle')

        print("===Stage 4: Evaluate===")
        origianlQuery = copy.deepcopy(self.data)
        # print("The performance of original queries: ")
        original_result, overall = self.evalPerformance(origianlQuery, 'query')
        overall['Items'] = "Original Query"
        self.overall_result = pd.concat([self.overall_result, pd.DataFrame.from_records([overall])])

        expanded_queries = copy.deepcopy(origianlQuery)
        for key, value in expanded_queries.items():
            value['query'] += ', ' + value['answer_query']
        # print(f"The performance of adding expansion without authour's information: ")
        without_info_result, overall = self.evalPerformance(expanded_queries, 'query')
        overall['Items'] = 'Expansion'

        self.overall_result.loc[len(self.overall_result)] = overall

        keywords_results = []
        for i in range(3):
            expanded_queries = copy.deepcopy(origianlQuery)
            for key, value in expanded_queries.items():
                for k in range(i + 1):
                    value['query'] += ', ' + value['expanded_keywords'][k]
            # print(f"The performance of adding {i+1} keywords: ")
            expanded_result, overall = self.evalPerformance(expanded_queries, 'query')
            keywords_results.append(expanded_result)
            # overall['Items'] = f'Personalised Expansion {i + 1} Keywords'
            # self.overall_result.loc[len(self.overall_result)] = overall

        expanded_queries = copy.deepcopy(origianlQuery)
        expanded_result, overall = self.evalExpandedPerformance(expanded_queries)
        overall['Items'] = 'Personalised Expansion'
        self.overall_result.loc[len(self.overall_result)] = overall

        expanded_queries = copy.deepcopy(origianlQuery)
        for key, value in expanded_queries.items():
            for k in range(len(value['expanded_keywords'])):
                value['query'] += ', ' + value['expanded_keywords'][k]
        # print(f"The performance of adding all keywords: ")
        all_result, overall = self.evalPerformance(expanded_queries, 'query')
        # overall['Items'] = 'Personalised Expansion All Keywords'
        # self.overall_result.loc[len(self.overall_result)] = overall

        for key, value in self.data.items():
            expandedQuery = value['query'] + ' and ' + value['answer_query']

            expandedQuery1 = value['query'] + ' and ' + value['expanded_keywords'][0]
            expandedQuery2 = expandedQuery1 + ' and ' + value['expanded_keywords'][1]
            expandedQuery3 = expandedQuery2 + ' and ' + value['expanded_keywords'][2]

            expandedQueryAll = value['query']
            for k in range(len(value['expanded_keywords'])):
                expandedQueryAll += ' and ' + value['expanded_keywords'][k]

            newRow = pd.Series([value['query'], original_result[key]['ndcg_cut_10'], \
                                expandedQuery, without_info_result[key]['ndcg_cut_10'], \
                                expandedQuery1, keywords_results[0][key]['ndcg_cut_10'], \
                                expandedQuery2, keywords_results[1][key]['ndcg_cut_10'], \
                                expandedQuery3, keywords_results[2][key]['ndcg_cut_10'], \
                                expandedQueryAll, all_result[key]['ndcg_cut_10'], expanded_result[key]['ndcg_cut_10']], index=self.result.columns)
            self.result = pd.concat([self.result, newRow.to_frame().T], ignore_index=True)

    def run_rerank(self, depth: int):
        print("===Stage 5: Re-rank===")
        origianlQuery = copy.deepcopy(self.data)
        # print("The performance of original queries: ")
        original_result, overall = self.evalRerankPerformance(origianlQuery, 'query', depth)
        overall['Items'] = "RERANK - Original Query"
        self.overall_result = pd.concat([self.overall_result, pd.DataFrame.from_records([overall])])

        expanded_queries = copy.deepcopy(origianlQuery)
        for key, value in expanded_queries.items():
            value['expanded_query'] = value['query'] + ', ' + value['answer_query']
        # print(f"The performance of adding expansion without authour's information: ")
        without_info_result, overall = self.evalRerankPerformance(expanded_queries, 'expanded_query', depth)
        overall['Items'] = "RERANK - Expansion"
        self.overall_result = pd.concat([self.overall_result, pd.DataFrame.from_records([overall])])

        keywords_results = []
        for i in range(3):
            expanded_queries = copy.deepcopy(origianlQuery)
            for key, value in expanded_queries.items():
                value['expanded_query'] = value['query']
                for k in range(i + 1):
                    value['expanded_query'] += ', ' + value['expanded_keywords'][k]
            # print(f"The performance of adding {i+1} keywords: ")
            expanded_result, overall = self.evalRerankPerformance(expanded_queries, 'expanded_query', depth)
            keywords_results.append(expanded_result)
            # overall['Items'] = f'RERANK - Personalised Expansion {i + 1} Keywords'
            # self.overall_result.loc[len(self.overall_result)] = overall

        expanded_queries = copy.deepcopy(origianlQuery)
        expanded_result, overall = self.evalRerunkExpandedPerformance(expanded_queries, depth)
        overall['Items'] = 'RERANK - Personalised Expansion'
        self.overall_result.loc[len(self.overall_result)] = overall

        expanded_queries = copy.deepcopy(origianlQuery)
        for key, value in expanded_queries.items():
            value['expanded_query'] = value['query']
            for k in range(len(value['expanded_keywords'])):
                value['expanded_query'] += ', ' + value['expanded_keywords'][k]
        all_result, overall = self.evalRerankPerformance(expanded_queries, 'expanded_query', depth)
        # overall['Items'] = 'RERANK - Personalised Expansion All Keywords'
        # self.overall_result.loc[len(self.overall_result)] = overall