In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
import logging
import sys
import pandas as pd
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.llms.openai import OpenAI
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

In [None]:
from llama_index.llms.azure_openai import AzureOpenAI

from reginald.models.models.llama_index import DataIndexCreator

import os
from reginald.utils import get_env_var

from llama_index.readers.github import (
    GithubClient,
    GitHubIssuesClient,
    GitHubRepositoryIssuesReader,
    GithubRepositoryReader,
)

## Set up Azure GPT4

In [None]:
# Set up the LLM
openai_azure_api_key = os.environ["OPENAI_AZURE_API_KEY"]
azure_endpoint = "https://reginald-uk-south.openai.azure.com/"
api_version = "2024-02-01"

azure_gpt4 = AzureOpenAI(
    model="gpt-4",
    deployment_name="reginald-gpt4",
    api_key=openai_azure_api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

In [None]:
# Setup settings for vectorisation later
from reginald.models.models.llama_index import setup_settings
from reginald.models.setup_llm import DEFAULT_ARGS
from transformers import AutoTokenizer

from reginald.models.models.llama_index import (
    setup_settings,
    LlamaIndexLlamaCPP,
    set_global_tokenizer,
    compute_default_chunk_size,
)

In [None]:
# set up settings
chunk_size = compute_default_chunk_size(
    max_input_size=4096, k=3
)  # calculate chunk size

tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf"
).encode

set_global_tokenizer(tokenizer)

settings = setup_settings(
    llm                 = azure_gpt4,
    max_input_size      = DEFAULT_ARGS["max_input_size"],
    num_output          = DEFAULT_ARGS["num_output"],
    chunk_size          = chunk_size,
    chunk_overlap_ratio = DEFAULT_ARGS["chunk_overlap_ratio"],
    k                   = DEFAULT_ARGS["k"],
    tokenizer           = tokenizer,
)

## Load the pre-generated questions

In [None]:
gh_token = get_env_var("GITHUB_TOKEN")

owner = "alan-turing-institute"
repo = "REG-handbook"

handbook_loader = GithubRepositoryReader(
    GithubClient(gh_token, fail_on_http_error=False),
    owner=owner,
    repo=repo,
    verbose=False,
    concurrent_requests=1,
    timeout=60,
    retries=3,
    filter_file_extensions=(
        [".md"],
        GithubRepositoryReader.FilterType.INCLUDE,
    ),
    filter_directories=(
        ["content"],
        GithubRepositoryReader.FilterType.INCLUDE,
    ),
)

documents = handbook_loader.load_data(branch="main")

In [10]:
# Save the results
import pickle

# Save the data to a file
with open('../../data/public/handbook_github_downloaded.pkl', 'wb') as file:
    pickle.dump(documents, file)

In [None]:
import json

save_name = "handbook_eval_questions.json"

# Load from the JSON file
with open(os.path.join('../../data/evaluations',save_name), 'r') as file:
    loaded_eval_questions = json.load(file)
    
loaded_eval_questions

In [None]:
# create vector index
vector_index = VectorStoreIndex.from_documents(documents, show_progress=True)

In [None]:
loaded_eval_questions[:4]

In [None]:
# query_engine    = vector_index.as_query_engine()
# response_vector = query_engine.query(loaded_eval_questions[1])

In [None]:
# Assuming vector_index and eval_questions are already defined

# query_engine = vector_index.as_query_engine()

# # Create a list or dictionary to store the responses
# response_vectors = []

# # Iterate over each question in eval_questions
# for question in loaded_eval_questions:
#     response = query_engine.query(question)
#     response_vectors.append(response)

## Get answers in batch

In [None]:
import asyncio

def evaluate_query_engine(query_engine, questions):
    c = [query_engine.aquery(q) for q in questions]
    results = asyncio.run(asyncio.gather(*c))
    print("finished query")
    
    return results

In [None]:
vector_query_engine = vector_index.as_query_engine()

results = evaluate_query_engine(vector_query_engine, loaded_eval_questions[:101])

In [None]:
all_results = []

# Define the batch size
batch_size = 25

# Get the total number of questions
total_questions = len(loaded_eval_questions)

# Loop over the list in increments of the batch size
for i in range(0, total_questions, batch_size):
    
    # Slice the list to get the current batch
    batch = loaded_eval_questions[i:i + batch_size]
    
    # Evaluate the current batch
    results = evaluate_query_engine(vector_query_engine, batch)
    
    # Optionally, print or store the results
    print(f"Results for batch {i // batch_size + 1} done.")
    
    all_results.extend(results)


In [None]:
# Save the results
import pickle

# Save the data to a file
with open('../../data/evaluations/handbook_eval_answers.pkl', 'wb') as file:
    pickle.dump(all_results, file)

In [None]:
# Load the data from the file
with open('../../data/evaluations/handbook_eval_answers.pkl', 'rb') as file:
    loaded_data = pickle.load(file)

In [None]:
vars(loaded_data[0])