## **Use ensemble for LLM model**

## To load the index and run query

In [1]:
# !pip install -q huggingface_hub
# !pip install -q llama-index
# !pip install -q transformers
# !pip install -q torch
# !pip install -q gradio
# !pip install -q llama-index-llms-huggingface
# !pip install -q llama-index-llms-huggingface-api
# !pip install -q llama-index-embeddings-huggingface
# !pip install -q llama-index-llms-groq
# !pip install -q llama-index-llms-gemini

In [2]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [12]:
from llama_index.core import Settings
from llama_index.llms.groq import Groq
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext
from llama_index.core import load_index_from_storage
from llama_index.core import SummaryIndex
from llama_index.core import PromptTemplate
from llama_index.core.tools import QueryEngineTool
import pandas as pd
import nest_asyncio
nest_asyncio.apply()

# define LLM, Embed models

## Note: Please replace API keys with your own API keys here.

Settings.llm = Groq(model="llama-3.1-8b-instant", api_key="your key")
Settings.chunk_size = 1024
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large")
Settings.embed_model = embed_model

#load indexs store in presist_dir

## Note: Use folder in the google drive which having indexes (do not use folder having .txt file).
## Indexed are build and kept in sum_key_vector_index_ensemble folder on gDrive.
## Location - https://drive.google.com/drive/folders/14ASzYUxIPeUCtsxFY9Jwzb1FKXY08lA5?usp=share_link
## If required, you can built it again. Please let us know to do it.

storage_context = StorageContext.from_defaults(persist_dir = r'C:\Users\agshi\Downloads\sum_key_vector_index_ensemble-20240909T154445Z-001-20240911T083314Z-001\sum_key_vector_index_ensemble-20240909T154445Z-001\sum_key_vector_index_ensemble')
vector_index = load_index_from_storage(storage_context, index_id = "9ff973d9-c21b-4710-a8e6-dfe399aa2cd5")
keyword_index = load_index_from_storage(storage_context, index_id = "34a4783e-7bbe-4eb9-acc6-0227b9173688")
summary_index = load_index_from_storage(storage_context, index_id = "1f3c9c00-00af-4435-be85-78e07c12417e")

#the default question answer prompt template

QA_PROMPT_TMPL = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question. If the answer is not in the context, inform "
    "the user that you can't answer the question - DO NOT MAKE UP AN ANSWER.\n"
    "Question: {query_str}\n"
    "Answer : "
)
QA_PROMPT = PromptTemplate(QA_PROMPT_TMPL)

# assign prompt to query_engine

keyword_query_engine = keyword_index.as_query_engine(
    text_qa_template=QA_PROMPT
)
vector_query_engine = vector_index.as_query_engine(text_qa_template=QA_PROMPT)

summary_query_engine = summary_index.as_query_engine(text_qa_template=QA_PROMPT)


# Convert them to queryenginetools

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=keyword_query_engine,
    description="Useful for answering questions about this context",
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for answering questions about this context",
)

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description="Useful for answering questions about this context",
)

# To enable synthesis between documents

from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)
from llama_index.core.response_synthesizers import TreeSummarize

TREE_SUMMARIZE_PROMPT_TMPL = (
    "Context information from multiple sources is below. Each source may or"
    " may not have \na relevance score attached to"
    " it.\n---------------------\n{context_str}\n---------------------\nGiven"
    " the information from multiple sources and their associated relevance"
    " scores (if provided) and not prior knowledge, answer the question. If"
    " the answer is not in the context, inform the user that you can't answer"
    " the question.\nQuestion: {query_str}\nAnswer: "
)

tree_summarize = TreeSummarize(
    summary_template=PromptTemplate(TREE_SUMMARIZE_PROMPT_TMPL)
)

#this the final query_engine

query_engine = RouterQueryEngine(
    selector=LLMMultiSelector.from_defaults(),
    query_engine_tools=[
        keyword_tool,
        vector_tool,
        summary_tool,
    ],
    summarizer=tree_summarize,
)

KeyboardInterrupt: 

: 

### without any query transform

In [11]:
response = await query_engine.aquery(
    " "
)

from llama_index.core.response.notebook_utils import display_response

display_response(response)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


**`Final Response:`** I'm ready to answer your question based on the provided context information, specifically regarding the disclosure and use of confidential information as per the Global Minimum Tax Act and related legislation. Please go ahead and ask your question.

### add query transformation improve retrival

In [13]:
def refine_query(query: str) -> str:
    refined_query = f"Provide a detailed answer about '{query}' in the context of legal enquiries"
    return refined_query

In [14]:
original_query = "How much can the Minister of Health pay out of the Consolidated Revenue Fund in relation to coronavirus disease 2019 (COVID-19) tests"
refined_query = refine_query(original_query)

In [15]:
response = await query_engine.aquery(
    refined_query
)

from llama_index.core.response.notebook_utils import display_response

display_response(response)

**`Final Response:`** Based on the provided context information from Bill C-10 and Bill C-8, the Minister of Health is authorized to make payments out of the Consolidated Revenue Fund in relation to coronavirus disease 2019 (COVID-19) tests.

From Bill C-10, it is stated that the Minister of Health can pay up to $2.5 billion out of the Consolidated Revenue Fund in relation to COVID-19 tests. This is stated in Section 1 of the Act, which reads: "The Minister of Health may make payments, the total of which may not exceed $2.5 billion, out of the Consolidated Revenue Fund for any expenses incurred on or after January 1, 2022 in relation to coronavirus disease 2019 (COVID-19) tests."

From Bill C-8, it is stated that the Minister of Health can also pay up to $1.72 billion out of the Consolidated Revenue Fund in relation to COVID-19 tests. This is stated in Part 6 of the Act, which reads: "Part 6 authorizes the Minister of Health to make payments of up to $1.72 billion out of the Consolidated Revenue Fund in relation to coronavirus disease 2019 (COVID-19) tests."

It is worth noting that these two bills may have different purposes and provisions, but in the context of the question, both bills provide information on the Minister of Health's authority to make payments out of the Consolidated Revenue Fund in relation to COVID-19 tests.

Therefore, the total amount that the Minister of Health can pay out of the Consolidated Revenue Fund in relation to coronavirus disease 2019 (COVID-19) tests is the sum of the authorized amounts from both bills, which is $2.5 billion + $1.72 billion = $4.22 billion.

However, it is also possible that the Minister of Health may only be authorized to make payments up to the lower of the two amounts, which is $1.72 billion, if the expenses incurred are only up to that amount. But based on the information provided, it seems that the Minister of Health has the authority to make payments up to the higher amount of $2.5 billion.

## Evaluation

## Proviing API key for Google Gemini API

## loading the data from google drive

In [11]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(r"C:\Users\agshi\Downloads\discussion_data-20240911T083316Z-001\discussion_data", filename_as_id=True).load_data()
print(f"Loaded {len(documents)} documents.")

ImportError: `llama-index-readers-file` package not found

## Generating the questions for evaluation

In [3]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.llms.gemini import Gemini
import os
import nest_asyncio
nest_asyncio.apply()
# create llm
os.environ["GOOGLE_API_KEY"] = "your key"
Settings.llm = Gemini(model="models/gemini-pro", temperature=0)
def question_dataset_generator(document):
  dataset_generator = RagDatasetGenerator.from_documents(
      documents=document,
      llm=llm,
      num_questions_per_chunk=2,
      show_progress=True       # set the number of qu/estions per nodes
  )

  rag_dataset = dataset_generator.generate_questions_from_nodes()
  question = [e.query for e in rag_dataset.examples]
  return question

TypeError: Expected: str, Model, or TunedModel

In [None]:
def chunk_document(document, chunk_size):
    # Split the document into chunks of the specified chunk size
    return [document[i:i+chunk_size] for i in range(0, len(document), chunk_size)]


In [None]:
questions = []
for i in range(0, len(documents)-76, 2):
    print(i)
    document_pair = documents[i:i+2]  # Take two documents at a time
    question = question_dataset_generator(document_pair)
    questions.extend(question)

In [None]:
question_dataset_generator(documents[3:4])

In [None]:
from llama_index.core.evaluation import BatchEvalRunner
from llama_index.core.evaluation import RelevancyEvaluator
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.llms.groq import Groq
import os
from llama_index.llms.gemini import Gemini

os.environ["GOOGLE_API_KEY"] = "your key"
llm = Gemini(model="models/gemini-pro", temperature=0)
relevancy_evaluator = RelevancyEvaluator(llm=llm)
faithfulness_evaluator = FaithfulnessEvaluator(llm=llm)
runner = BatchEvalRunner(
    {"faithfulness": faithfulness_evaluator, "relevancy": relevancy_evaluator},
    workers=8,
)

In [None]:
def extract_elements(eval_result):
    # Dictionary to store the extracted elements
    extracted_data = {
        "contexts": [eval_result.contexts],
        "response": [eval_result.response],
        "passing": [eval_result.passing],
        "feedback": [eval_result.feedback],
        "score": [eval_result.score],
        "pairwise_source": [eval_result.pairwise_source],
        "invalid_result": [eval_result.invalid_result],
        "invalid_reason": [eval_result.invalid_reason]
    }

    # Convert the dictionary into a DataFrame
    df = pd.DataFrame(extracted_data)
    return df


In [None]:
import time
faithfulness_df = pd.DataFrame()
relevancy_df = pd.DataFrame()

for question in questions:
    response = query_engine.query(question)

    # Evaluate faithfulness
    eval_result = faithfulness_evaluator.evaluate_response(query=question, response=response)
    faithfulness_elements = extract_elements(eval_result)
    faithfulness_df = pd.concat([faithfulness_df, faithfulness_elements], ignore_index=True)

    # Evaluate relevancy
    eval_result = relevancy_evaluator.evaluate_response( query=question, response=response)
    relevancy_elements = extract_elements(eval_result)
    relevancy_df = pd.concat([relevancy_df,relevancy_elements], ignore_index=True)

    time.sleep(60)


In [None]:
relevancy_df

## Creating dataframe for average score calculation

In [None]:
def calculate_average_scores(faithfulness_df,relevancy_df):
  avg_faithfulness_score = faithfulness_df['score'].mean()
  avg_relevancy_score = relevancy_df['score'].mean()
  return {
        "average_faithfulness_score": avg_faithfulness_score,
        "average_relevancy_score": avg_relevancy_score
    }

In [None]:
calculate_average_scores(faithfulness_df,relevancy_df)

In [None]:
faithfulness_df