In [None]:
# sys imports
import os
import sys
import gc
import warnings
import logging
import nest_asyncio

# data handling
import pandas as pd

# Deep Learning
import torch

# RAG
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Response
)
from llama_index.core.evaluation import (
    BatchEvalRunner,
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    RetrieverEvaluator,
    generate_question_context_pairs,
    EmbeddingQAFinetuneDataset,
    DatasetGenerator
)
from llama_index.llms.openai import OpenAI
from llama_index.llms.huggingface import HuggingFaceLLM

#sys.path.append(os.path.join('/Users/juandiegogallegoquiceno/Desktop/pinacle/secrets'))
from dotenv import load_dotenv, dotenv_values

# import secrets
#from hf_secrets import api_token as hf_token
#from openai_secrets import api_key as oai_token

In [None]:
# if using dotenv
secrets = dotenv_values(dotenv_path="/home/paperspace/Desktop/secrets/.env")

OPENAI_API_KEY = secrets['OPENAI_API_KEY']
HF_TOKEN = secrets['HF_API_TOKEN']
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN


#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#HF_TOKEN = os.getenv("HF_API_TOKEN")
# assign as environ vars
#os.environ['OPENAI_API_KEY'] = oai_token
#os.environ['HF_TOKEN'] = hf_token
#
#OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
#HF_TOKEN = os.environ['HF_TOKEN']

## Read data

In [None]:
reader = SimpleDirectoryReader('../data')
documents = reader.load_data('Final Policy document_LICs New Jeevan Shanti_V05_logo.pdf')
len(documents)

## Embedding types

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [None]:
# load embed model
hf_embed_model = HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')

## LLMs

In [None]:
gpt3_5 = OpenAI(model='gpt-3.5-turbo', temperature=0.1)
llama2 = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-hf",
    tokenizer_name="meta-llama/Llama-2-7b-hf",
    generate_kwargs={"temperature": 0.1, "do_sample": True},
)

In [None]:
# util function to get metric result from batch eval runner
def get_eval_results(key, eval_results):
    results = eval_results[f'{key}']
    correct = 0
    for result in results:
        if result.passing:
            correct += 1
    score = correct / len(results)
    print(f"{key} Score: {score}")
    return score

### With Llama2

In [None]:
nest_asyncio.apply()

In [None]:
# generate some data from document
num_eval_nodes = 10
data_generator = DatasetGenerator.from_documents(documents, llm=llama2)
eval_dataset = data_generator.generate_dataset_from_nodes(num=num_eval_nodes)
#eval_dataset = data_generator.agenerate_dataset_from_nodes(num=num_eval_nodes)

In [None]:
# now get eval questions and eval answers
eval_questions = [ex[0] for ex in eval_dataset.qr_pairs]
eval_answers = [ex[1] for ex in eval_dataset.qr_pairs]

print(len(eval_questions))
print(eval_questions[0], eval_answers[0])

In [None]:
# get and eval query example
eval_query = eval_questions[0]
#### Vector store
# with Hugginface Embedding BAAI/bge-small-en-v1.5
index = VectorStoreIndex.from_documents(documents, embed_model=hf_embed_model, llm=llama2)
# query engine to generate response
query_engine = index.as_query_engine()
# define retriever as well
retriever = index.as_retriever(similarity_top_k=3)
# get nodes
nodes = retriever.retrieve(eval_query)

#### Evaluation

In [None]:
# define a runner evaluation instance
runner = BatchEvalRunner(
    {
        "faithfulness": FaithfulnessEvaluator(llm=llama2),
        "relevancy": RelevancyEvaluator(llm=llama2),
        "correctness": CorrectnessEvaluator(llm=llama2)
    },
    workers=8
)
# evaluate
eval_results = await runner.aevaluate_queries(
    query_engine,
    queries=eval_questions,
    reference=eval_answers
)
# see results
for key in ["faithfulness", "relevancy", "correctness"]:
    get_eval_results(key, eval_results)

### With GPT 3.5 Turbo

In [None]:
# generate some data from document
num_eval_nodes = 10
data_generator = DatasetGenerator.from_documents(documents, llm=gpt3_5)
eval_dataset = data_generator.generate_dataset_from_nodes(num=num_eval_nodes)

In [None]:
# now get eval questions and eval answers
eval_questions = [ex[0] for ex in eval_dataset.qr_pairs]
eval_answers = [ex[1] for ex in eval_dataset.qr_pairs]

print(len(eval_questions))
print(eval_questions[0], eval_answers[0])

In [None]:
# get and eval query example
eval_query = eval_questions[0]

#### Vector store

In [None]:
# with Hugginface Embedding BAAI/bge-small-en-v1.5
index = VectorStoreIndex.from_documents(documents, embed_model=hf_embed_model, llm=gpt3_5)
# query engine to generate response
query_engine = index.as_query_engine()
# define retriever as well
retriever = index.as_retriever(similarity_top_k=3)
# get nodes
nodes = retriever.retrieve(eval_query)

#### Evaluation

In [None]:
# define a runner evaluation instance
runner = BatchEvalRunner(
    {
        "faithfulness": FaithfulnessEvaluator(llm=gpt3_5),
        "relevancy": RelevancyEvaluator(llm=gpt3_5),
        "correctness": CorrectnessEvaluator(llm=gpt3_5)
    },
    workers=8
)
# evaluate
eval_results = await runner.aevaluate_queries(
    query_engine,
    queries=eval_questions,
    reference=eval_answers
)

In [None]:
# see results
for key in ["faithfulness", "relevancy", "correctness"]:
    get_eval_results(key, eval_results)