In [1]:
from reginald.models.setup_llm import setup_llm

import pandas as pd

from reginald.models.models.llama_index import (
    setup_settings,
    LlamaIndexLlamaCPP,
    set_global_tokenizer,
    compute_default_chunk_size,
)




In [2]:
from llama_index.core import (
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.evaluation import DatasetGenerator
from llama_index.llms.ollama import Ollama

In [3]:
from transformers import AutoTokenizer

In [4]:
import nest_asyncio

nest_asyncio.apply()

In [5]:
# response_model = setup_llm(
#     model="llama-index-llama-cpp",
#     model_name="../../../llama-2-7b-chat.Q4_K_M.gguf",  # or hugging face path
#     data_dir="../../data/",  # needs to be path to your data directory
#     which_index="handbook",
# )

In [6]:
ollama_llm = Ollama(model="llama2:7b-chat", request_timeout=60.0)

In [7]:
chunk_size = compute_default_chunk_size(
    max_input_size=4096, k=3
)  # calculate chunk size

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf"
).encode  # load tokenizer
set_global_tokenizer(tokenizer)

In [9]:
settings = setup_settings(
    llm=ollama_llm,
    max_input_size=4096,
    num_output=512,
    chunk_overlap_ratio=0.1,
    chunk_size=chunk_size,
    k=3,
    tokenizer=tokenizer,
)  # these are settings for the storage context

In [10]:
# storage_context = StorageContext.from_defaults(
#     persist_dir="../../data/llama_index_indices/handbook/"
# )

# vector_index = load_index_from_storage(
#     storage_context=storage_context,
#     settings=settings,
# )  # load the data index from storage

In [12]:
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.llms.openai import OpenAI

reader = SimpleDirectoryReader("./data/paul_graham/")
documents = reader.load_data()
vector_index = VectorStoreIndex.from_documents(documents)

In [57]:
dataset_generator = DatasetGenerator.from_documents(
    vector_index.docstore.docs.values(),
    num_questions_per_chunk=1,
    show_progress=True,
    llm=ollama_llm
)

Parsing nodes:   0%|          | 0/26 [00:00<?, ?it/s]

  return cls(


In [67]:
eval_questions = dataset_generator.generate_questions_from_nodes(
    5
)  # generate questions from the documents

100%|██████████| 5/5 [01:21<00:00, 16.21s/it]
  return QueryResponseDataset(queries=queries, responses=responses_dict)


In [68]:
eval_questions

['Of course! Here are 5 questions based on the given context:',
 "What was Paul Graham's main focus outside of school before college?",
 'What was the language used in the IBM 1401 that Paul and his friend Rich Draves used in 9th grade?',
 'How did Paul feel when he learned that programs could terminate, or not?',
 'Can you identify any differences between a computer built as a kit by Heathkit and a TRS-80?']

In [69]:
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.llms.openai import OpenAI

In [70]:
evaluator_gpt4 = RelevancyEvaluator(llm=ollama_llm)

In [74]:
# define jupyter display function
def display_eval_df(query: str, response: Response, eval_result: str) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": query,
            "Response": str(response),
            "Source": (
                response.source_nodes[0].node.get_content()[:1000] + "..."
            ),
            "Evaluation Result": eval_result.passing,
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    return(eval_df)
    #display(eval_df)

In [None]:
# for every value in eval_questions, get the response from the llm
all_dfs = []

for i in range(len(eval_questions)):
    print(i)
    response = evaluator_gpt4.get_response(eval_questions[i])
    eval_result = evaluator_gpt4.evaluate_response(response)
    all_dfs[i] = display_eval_df(eval_questions[i].question, response, eval_result)



In [None]:
# combine all the dataframes
combined_df = pd.concat(all_dfs)

# Example Case

In [None]:
import logging
import sys
import pandas as pd

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.llms.openai import OpenAI

In [None]:
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

In [None]:
reader = SimpleDirectoryReader("./data/paul_graham/")
documents = reader.load_data()

In [None]:
data_generator = DatasetGenerator.from_documents(
    documents,
    show_progress=True
    )

In [None]:
eval_questions = data_generator.generate_questions_from_nodes(3)

In [None]:
eval_questions

In [None]:
# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-3.5-turbo")

In [None]:
evaluator_gpt4 = RelevancyEvaluator(llm=gpt4)

In [None]:
# create vector index
vector_index = VectorStoreIndex.from_documents(documents)

In [None]:
# define jupyter display function
def display_eval_df(query: str, response: Response, eval_result: str) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": query,
            "Response": str(response),
            "Source": (
                response.source_nodes[0].node.get_content()[:1000] + "..."
            ),
            "Evaluation Result": eval_result,
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    display(eval_df)

In [None]:
query_engine = vector_index.as_query_engine()
response_vector = query_engine.query(eval_questions[1])
eval_result = evaluator_gpt4.evaluate_response(
    query=eval_questions[1], response=response_vector
)