In [1]:
import sys

sys.path.insert(1, "../../")

In [2]:
import json
import logging
import os
from copy import deepcopy
from dataclasses import dataclass

import pandas as pd
import tiktoken
from evaluation.question_gen.question import Question

from graphrag.model.community import Community
from graphrag.model.community_report import CommunityReport
from graphrag.model.entity import Entity
from graphrag.query.indexer_adapters import (
    read_indexer_communities,
    read_indexer_entities,
    read_indexer_reports,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.llm.text_utils import num_tokens
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.map_system_prompt import (
    MAP_SYSTEM_PROMPT,
)
from graphrag.query.structured_search.global_search.reduce_system_prompt import (
    REDUCE_SYSTEM_PROMPT,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
logging.basicConfig(level=logging.INFO, format="%(message)s")
logging.getLogger("httpx").setLevel(logging.WARNING)

### Evaluating global search performance on different classes of questions

This notebook shows an example of using the evaluation package to generate global search answers for an experiment that evaluate the global search performance for different classes of questions.


### LLM setup

In [4]:
api_key = os.getenv("AP_OPENAI_API_KEY")
api_version = "2024-02-15-preview"
llm_model = "gpt-4o-2024-05-13"
llm_deployment_name = "gpt-4o-2024-05-13"

llm_init_params = {
    "api_key": api_key,
    "api_version": api_version,
    "model": llm_model,
    "deployment_name": llm_deployment_name,
    "api_type": OpenaiApiType.OpenAI,
    "max_retries": 50,
}

token_encoder = tiktoken.get_encoding("o200k_base")

### Creating global search engine

In [5]:
MAX_DATA_TOKENS = 8000
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
COMMUNITY_TABLE = "create_final_communities"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"

OUTPUT_DIR = f"./answers/autoq/v1/{llm_model}"
QUESTION_DIR = "./questions/autoq/v1"

In [6]:
def prep_data(input_dir: str, community_level: int):
    full_input_dir = f"output/{input_dir}/artifacts"
    entity_df = pd.read_parquet(f"{full_input_dir}/{ENTITY_TABLE}.parquet")
    report_df = pd.read_parquet(f"{full_input_dir}/{COMMUNITY_REPORT_TABLE}.parquet")
    entity_embedding_df = pd.read_parquet(
        f"{full_input_dir}/{ENTITY_EMBEDDING_TABLE}.parquet"
    )
    community_df = pd.read_parquet(f"{full_input_dir}/{COMMUNITY_TABLE}.parquet")

    communities = read_indexer_communities(
        final_communities=community_df,
        final_nodes=entity_df,
        final_community_reports=report_df,
    )
    reports = read_indexer_reports(
        final_community_reports=report_df,
        final_nodes=entity_df,
        community_level=community_level,
        dynamic_selection=True,
    )
    entities = read_indexer_entities(
        final_nodes=entity_df,
        final_entities=entity_embedding_df,
        community_level=community_level,
    )
    print(f"Report records: {len(reports)}")
    # print(report_df.head())
    return reports, entities, communities


def create_search_engine(
    reports: list[CommunityReport],
    entities: list[Entity],
    communities: list[Community],
    max_data_tokens: int = MAX_DATA_TOKENS,
    max_map_output_tokens: int = 1000,
    max_reduce_output_tokens: int = 2000,
    map_system_prompt: str = MAP_SYSTEM_PROMPT,
    reduce_system_prompt: str = REDUCE_SYSTEM_PROMPT,
):
    """create global search engine"""

    # initialize GPT-4o mini for dynamic search
    mini_llm = ChatOpenAI(**{
        "api_key": api_key,
        "api_version": api_version,
        "model": "gpt-4o-mini",
        "deployment_name": "gpt-4o-mini",
        "api_type": OpenaiApiType.OpenAI,
        "max_retries": 50,
    })

    context_builder = GlobalCommunityContext(
        community_reports=reports,
        entities=entities,
        communities=communities,
        llm=mini_llm,
        token_encoder=token_encoder,
        dynamic_selection=True,
        dynamic_selection_params={
            "keep_parent": False,
            "num_repeats": 1,
            "use_summary": False,
            "concurrent_coroutines": 16,
            "rating_threshold": 1,
            "start_with_root": True,
        },
    )

    context_builder_params = {
        "use_community_summary": False,
        "shuffle_data": True,
        "include_community_rank": True,
        "min_community_rank": 0,
        "community_rank_name": "rank",
        "include_community_weight": False,
        "community_weight_name": "occurrence weight",
        "normalize_community_weight": False,
        "max_tokens": MAX_DATA_TOKENS,
        "context_name": "Reports",
    }

    map_llm_params = {
        "max_tokens": max_map_output_tokens,
        "temperature": 0.0,
        "response_format": {"type": "json_object"},
    }

    reduce_llm_params = {
        "max_tokens": max_reduce_output_tokens,
        "temperature": 0.0,
    }

    llm = ChatOpenAI(**llm_init_params)

    search_engine_configs = {
        "llm": llm,
        "context_builder": context_builder,
        "token_encoder": token_encoder,
        "max_data_tokens": max_data_tokens,
        "map_llm_params": map_llm_params,
        "reduce_llm_params": reduce_llm_params,
        "map_system_prompt": map_system_prompt,
        "reduce_system_prompt": reduce_system_prompt,
        "context_builder_params": context_builder_params,
        "concurrent_coroutines": 16,
        "allow_general_knowledge": False,
        "json_mode": True,
        "response_type": "multiple paragraphs",
    }
    search_engine = GlobalSearch(**search_engine_configs)
    return search_engine, search_engine_configs, llm_init_params

#### Generate evaluation data

In [7]:
import pickle

import numpy as np
from evaluation.answer_gen.search_answer_gen import (
    AnswerType,
    GlobalSearchAnswerGenerator,
)

In [8]:
@dataclass
class QuestionSet:
    name: str
    questions: list[Question]


def load_question_set(question_dir: str, question_name: str) -> QuestionSet:
    question_file = f"{question_dir}/{question_name}.json"

    # load question json file
    questions: list[Question] = []
    with open(question_file, "r") as f:
        question_json = json.load(f)
        for question in question_json:
            questions.append(Question(id=question["id"], text=question["text"]))
    return QuestionSet(name=question_name, questions=questions)


async def generate_answers(
    question_set: QuestionSet,
    input_dir: str,
    community_level: int,
    max_data_tokens: int,
    max_map_output_tokens: int,
    max_reduce_output_tokens: int,
    map_prompt: dict,
    reduce_prompt: dict,
):
    reports, entities, communities = prep_data(input_dir, community_level)
    search_engine, search_engine_configs, llm_init_params = create_search_engine(
        reports=reports,
        entities=entities,
        communities=communities,
        max_data_tokens=max_data_tokens,
        max_map_output_tokens=max_map_output_tokens,
        max_reduce_output_tokens=max_reduce_output_tokens,
        map_system_prompt=map_prompt["prompt"],
        reduce_system_prompt=reduce_prompt["prompt"],
    )
    answer_generator = GlobalSearchAnswerGenerator(
        search_engine=search_engine,
        search_engine_configs=search_engine_configs,
        llm_init_params=llm_init_params,
        concurrent_coroutines=2,
    )

    # generate main answers
    print("GENERATING MAIN ANSWERS...")
    candidate_answers = await answer_generator.agenerate(
        questions=question_set.questions, answer_type=AnswerType.CANDIDATE_ANSWER
    )
    # calculate average completion time
    completion_times = []
    build_context_input_tokens, build_context_output_tokens = [], []
    map_reduce_input_tokens, map_reduce_output_tokens = [], []
    for answer in candidate_answers:
        if answer.generated_answer.response.strip() != "":
            completion_times.append(answer.generated_answer.completion_time)
            build_context_input_tokens.append(answer.generated_answer.prompt_tokens['build_context'])
            build_context_output_tokens.append(answer.generated_answer.output_tokens['build_context'])
            map_reduce_input_tokens.append(answer.generated_answer.prompt_tokens['map'])
            map_reduce_output_tokens.append(answer.generated_answer.output_tokens['map'])
            map_reduce_input_tokens.append(answer.generated_answer.prompt_tokens['reduce'])
            map_reduce_output_tokens.append(answer.generated_answer.output_tokens['reduce'])


    results = {
        "input_dir": input_dir,
        "community_level": community_level,
        "max_data_tokens": max_data_tokens,
        "max_map_output_tokens": max_map_output_tokens,
        "max_reduce_output_tokens": max_reduce_output_tokens,
        "map_prompt": map_prompt,
        "reduce_prompt": reduce_prompt,
        "map_prompt_type": map_prompt["type"],
        "reduce_system_prompt_type": reduce_prompt["type"],
        "mean_completion_time": np.mean(completion_times),
        "std_completion_time": np.std(completion_times),
        "total_input_tokens": sum([np.sum(build_context_input_tokens), np.sum(map_reduce_input_tokens)]),
        "total_output_tokens": sum([np.sum(build_context_output_tokens), np.sum(map_reduce_output_tokens)]),
        "num_valid_answers": len([
            a for a in candidate_answers if a.generated_answer.response.strip() != ""
        ]),
        "answers": candidate_answers,
    }

    file_name = f"candidate_answers.pkl"
    FINAL_OUTPUT_DIR = f"{OUTPUT_DIR}/{input_dir}/{question_set.name}/global_search_fixed_community_filtering_{community_level}"
    if not os.path.exists(FINAL_OUTPUT_DIR):
        os.makedirs(FINAL_OUTPUT_DIR)

    with open(f"./{FINAL_OUTPUT_DIR}/{file_name}", "wb") as f:
        pickle.dump(results, f)

    simplified_results = []
    for qa in candidate_answers:
        simplified_results.append({
            "question_id": qa.question.id,
            "question_text": qa.question.text,
            "answer": qa.generated_answer.response.strip(),
        })
        if qa.generated_answer.response.strip() == "":
            print(f"Empty answer for question: {qa.question.text}")

    with open(f"./{FINAL_OUTPUT_DIR}/candidate_answers_text.json", "w") as f:
        json.dump(simplified_results, f, indent=4)

    return results

### Load questions

In [9]:
input_dirs = [
    "20240910-222417-exp7",
]

questions = [
    "data_global_questions",
    # "data_local_questions",
    # "activity_global_questions",
    # "activity_local_questions",
]

max_data_tokens_options = [MAX_DATA_TOKENS]
community_levels = [1]
max_map_output_tokens_options = [1000]
max_reduce_output_tokens_options = [2000]
map_prompts = [{"prompt": MAP_SYSTEM_PROMPT, "type": "default"}]
reduce_prompts = [{"prompt": REDUCE_SYSTEM_PROMPT, "type": "default"}]

In [10]:
# get all combinations of parameters using itertools
from itertools import product

question_sets = []
for question in questions:
    question_set = load_question_set(QUESTION_DIR, question)
    question_sets.append(question_set)

combinations = list(
    product(
        question_sets,
        input_dirs,
        community_levels,
        max_data_tokens_options,
        max_map_output_tokens_options,
        max_reduce_output_tokens_options,
        map_prompts,
        reduce_prompts,
    )
)


for index, combination in enumerate(combinations):
    run_stats = []
    print(f"Processing combination: {index}")
    results = await generate_answers(*combination)
    run_stats.append({
        "question_set": combination[0].name,
        "input_dir": combination[1],
        "llm_model": llm_model,
        "community_level": combination[2],
        "avg_completion_time": results["mean_completion_time"],
        "std_completion_time": results["std_completion_time"],
        "total_input_tokens": results["total_input_tokens"],
        "total_output_tokens": results["total_output_tokens"],
        "num_valid_answers": results["num_valid_answers"],
    })
    stats_df = pd.DataFrame(run_stats)
    stats_df.to_csv(
        f"{OUTPUT_DIR}/global_search_fixed_community_filtering_{combination[0].name}_community_{combination[2]}_stats.csv"
    )

Processing combination: 0
