### Os, Sync, Env

In [15]:
import os
import dotenv
dotenv.load_dotenv(dotenv_path="../.env")
import nest_asyncio
nest_asyncio.apply()

### Azure Configuration

In [16]:
azure_api_key = os.environ["OPENAI_API_KEY"]
azure_base_url = os.environ["AZURE_BASE_URL"]
# From model URL endpoint
azure_api_version = "2024-08-01-preview"

### LLM & EMBEDDING MODEL

In [17]:
# GROQ
QROQ_API_KEY = os.environ['GROQ_API_KEY_1']
from llama_index.llms.groq import Groq
groq_llm = Groq(model="llama3-8b-8192", api_key = QROQ_API_KEY, set_run_config=None)
print("Groq: "+str(groq_llm.complete("Hi")))
# Cohere
CO_API_KEY = os.environ['COHERE_API_KEY']
from llama_index.llms.cohere import Cohere
cohere_llm = Cohere(model="command-light",api_key=CO_API_KEY)
print("Cohere: "+str(cohere_llm.complete("Hi")))
# Fast Embed
from llama_index.embeddings.fastembed import FastEmbedEmbedding
fast_embed = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

from ragas.llms import LlamaIndexLLMWrapper
from llama_index.llms.azure_openai import AzureOpenAI
evaluator_llm = LlamaIndexLLMWrapper(AzureOpenAI(
    model="gpt-4o-mini",
    deployment_name="gpt-4o-mini",
    api_key=azure_api_key,
    azure_endpoint=azure_base_url,
    api_version=azure_api_version,
))
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from ragas.embeddings import LlamaIndexEmbeddingsWrapper
evaluator_embedding = LlamaIndexEmbeddingsWrapper(AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="text-embedding-ada-002",
    api_key=azure_api_key,
    azure_endpoint=azure_base_url,
    api_version="2023-05-15",
))

Groq: Hi! It's nice to meet you. Is there something I can help you with or would you like to chat?
Cohere:  Hi, it's nice to connect with you! How can I assist you today? I am here to ready to provide answers to connect or explore ideas. Feel free to get your critical information, and I can assist you with many areas of interest, including answering questions and providing creative content ideas. 


Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]


### Evaluation Dataset

In [None]:
import pandas as pd
df = pd.read_csv("D:/Learning New/GenAI/Project_RAG/RAG/1. Data/evaluator1_10.csv")
import ast
df['reference_contexts'] = df['reference_contexts'].apply(
    lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)
print(df['reference_contexts'].iloc[0])  # Check the first value
print(type(df['reference_contexts'].iloc[0]))  # Inspect the type


['  Company Name - Attogram\n                    Product List / Index - 1mg - Moisture Analyzer - 0.01%, 0.1mg - Moisture Analyzer - 0.005%, 0.01mg Semi Micro / Analytical Balance , 0.1mg - Analytical Balance, 1mg Balances, 10mg Balances, 100mg Balances, Table Top Scales, Crane Scales, Standard Weights, Antivibration Pad, Density Kit\n              ']
<class 'list'>


KeyError: "None of [Index(['retrieved_contexts', 'response'], dtype='object')] are in the [columns]"

### SETTINGS

In [19]:
from llama_index.core.settings import Settings
Settings.llm = groq_llm
Settings.embed_model = fast_embed

### Loading Index from Persisted Index Stores

In [20]:
from llama_index.core import StorageContext, load_index_from_storage

storage_context1 = StorageContext.from_defaults(persist_dir="D:/Learning New/GenAI/Project_RAG/RAG/1. Data/index_store")
index_from_docs = load_index_from_storage(storage_context1)
storage_context2 = StorageContext.from_defaults(persist_dir="D:/Learning New/GenAI/Project_RAG/RAG/1. Data/index_from_node_store")
index_from_nodes = load_index_from_storage(storage_context2)

### Query

In [21]:
query = "Difference - PEB241T and PEB347T. What is capacity for each instrument?"

### Retrieval (Vector)

In [22]:
retriever_from_docs = index_from_docs.as_retriever(
    similarity_top_k=10,
    similarity_threshold=0.75)
retriever_from_nodes = index_from_nodes.as_retriever(
    similarity_top_k=10,
    similarity_threshold=0.75)

### Query Engine (Vector)

In [23]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# configure a post processor
similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.42)

# configure a response sythesizer
response_synthsizer = get_response_synthesizer(llm=Settings.llm)

# create a query engine
default_query_engine_from_docs = RetrieverQueryEngine(
    retriever=retriever_from_docs,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)
# create a query engine
default_query_engine_from_nodes = RetrieverQueryEngine(
    retriever=retriever_from_nodes,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)

### Querying (Vector)

In [24]:
# Querying
print(default_query_engine_from_docs.query(query).response)
print(default_query_engine_from_nodes.query(query).response)

According to the context information, the capacity for PEB241T is 220g and 320g, and for PEB347T, it is not mentioned.
The capacity for PEB241T is 220g and 320g, and the capacity for PEB347T is also 220g and 320g.


### Evaluating Default Method

In [61]:
from ragas.metrics import Faithfulness, LLMContextRecall, AnswerRelevancy
from ragas.integrations.llama_index import evaluate
from datasets import load_dataset
from ragas import EvaluationDataset
eval_dataset = EvaluationDataset.from_pandas(df)
metrics = [
    LLMContextRecall(llm=evaluator_llm)
]
results = evaluate(dataset=eval_dataset, metrics=metrics, batch_size=1, query_engine=default_query_engine_from_docs)

Running Query Engine: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
Evaluating: 100%|██████████| 10/10 [01:06<00:00,  6.66s/it]


In [None]:
print(results)
df_new = results.to_pandas()

{'context_recall': 0.4333}
retrieved_contexts    0
response              0
dtype: int64
retrieved_contexts    [Company Name - Attogram\n                    ...
response              The Semi Micro category includes the following...
Name: 0, dtype: object


In [63]:
from ragas.metrics import Faithfulness, LLMContextRecall, AnswerRelevancy
from ragas.integrations.llama_index import evaluate
from datasets import load_dataset

from ragas import EvaluationDataset

eval_dataset = EvaluationDataset.from_pandas(df)
metrics = [
    AnswerRelevancy(llm=evaluator_llm)
]
results_1 = evaluate(dataset=eval_dataset, metrics=metrics, batch_size=1, query_engine=default_query_engine_from_docs)

Running Query Engine: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]
Evaluating:   0%|          | 0/10 [00:00<?, ?it/s]Exception raised in Job[8]: AuthenticationError(Error code: 401 - {'error': {'message': 'Incorrect API key provided: 9nTwVLK8************************************************************************mi9K. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}})
Evaluating:  10%|█         | 1/10 [02:16<20:30, 136.67s/it]Exception raised in Job[6]: AuthenticationError(Error code: 401 - {'error': {'message': 'Incorrect API key provided: 9nTwVLK8************************************************************************mi9K. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}})
Exception raised in Job[4]: AuthenticationError(Error code: 401 - {'error': {'message': 'Incorrect API key 

KeyError: 0

In [64]:
print(results_1)
results_1.to_pandas()

NameError: name 'results_1' is not defined

In [None]:
print(results_1)
df_2 = results_1.to_pandas()
from ragas.metrics import Faithfulness, LLMContextRecall, AnswerRelevancy
from ragas.integrations.llama_index import evaluate
from datasets import load_dataset

from ragas import EvaluationDataset
eval_dataset = EvaluationDataset.from_pandas(df_2)
metrics = [
    Faithfulness(llm=evaluator_llm)
]
results_2 = evaluate(dataset=eval_dataset, metrics=metrics, batch_size=1, query_engine=default_query_engine_from_docs)

### Retrieval (BM25)

In [None]:
from llama_index.retrievers.bm25 import BM25Retriever

bm25_retriever_from_nodes = BM25Retriever.from_defaults(docstore=index_from_nodes.docstore, similarity_top_k=10)
bm25_retriever_from_docs = BM25Retriever.from_defaults(docstore=index_from_docs.docstore, similarity_top_k=10)

resource module not available on Windows


### Query Engine (BM25)

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# configure a post processor
similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.42)

# configure a response sythesizer
response_synthsizer = get_response_synthesizer(llm=Settings.llm)

# create a query engine
bm25_query_engine_from_docs = RetrieverQueryEngine(
    retriever=bm25_retriever_from_docs,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)
# create a query engine
bm25_query_engine_from_nodes = RetrieverQueryEngine(
    retriever=bm25_retriever_from_nodes,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)

### Querying (BM25)

In [None]:
# Querying
print(bm25_query_engine_from_docs.query(query).response)
print(bm25_query_engine_from_nodes.query(query).response)

The capacity for PEB241T is 220g and the capacity for PEB347T is 320g.
The capacity for PEB241T is 220g and 320g, and for PEB347T it is also 220g and 320g.


### Retrieval (Hybrid)

In [None]:
from llama_index.core.retrievers import QueryFusionRetriever

hybrid_retriever_from_docs = QueryFusionRetriever(
    [bm25_query_engine_from_docs, retriever_from_docs],
    similarity_top_k=10,
    num_queries=1,
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
)
hybrid_retriever_from_nodes = QueryFusionRetriever(
    [bm25_query_engine_from_nodes, retriever_from_nodes],
    similarity_top_k=10,
    mode="reciprocal_rerank",
    num_queries=1,
    use_async=True,
    verbose=True,
)

### Query Engine (Hybrid)

In [None]:
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# configure a post processor
similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.02)

# configure a response sythesizer
response_synthsizer = get_response_synthesizer(llm=Settings.llm)

# create a query engine
hybrid_query_engine_from_docs = RetrieverQueryEngine(
    retriever=hybrid_retriever_from_docs,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)
# create a query engine
hybrid_query_engine_from_nodes = RetrieverQueryEngine(
    retriever=hybrid_retriever_from_nodes,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)

### Querying (Hybrid)

In [None]:
# Querying
print(hybrid_query_engine_from_docs.query(query).response)
print(hybrid_query_engine_from_nodes.query(query).response)

The capacity for PEB241T is 220g and the capacity for PEB347T is 320g.
The capacity for PEB241T is 220g and the capacity for PEB347T is 320g.
