### Os, Sync, Env

In [1]:
import os
import dotenv
dotenv.load_dotenv(dotenv_path="../.env")
import nest_asyncio
nest_asyncio.apply()

### LLM & EMBEDDING MODEL

In [2]:
# GROQ
QROQ_API_KEY = os.environ['GROQ_API_KEY_1']
from llama_index.llms.groq import Groq
groq_llm = Groq(model="llama3-8b-8192", api_key = QROQ_API_KEY, set_run_config=None)
print("Groq: "+str(groq_llm.complete("Hi")))
# Cohere
CO_API_KEY = os.environ['COHERE_API_KEY']
from llama_index.llms.cohere import Cohere
cohere_llm = Cohere(model="command-light",api_key=CO_API_KEY)
print("Cohere: "+str(cohere_llm.complete("Hi")))
# Fast Embed
from llama_index.embeddings.fastembed import FastEmbedEmbedding
fast_embed = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


Groq: Hi! It's nice to meet you. Is there something I can help you with or would you like to chat?
Cohere:  Hello! Welcome to the conversation, How can I help you with anything? 


Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]


### SETTINGS

In [3]:
from llama_index.core.settings import Settings
Settings.llm = groq_llm
Settings.embed_model = fast_embed

### LOADING DOCS FROM PERSISTED DOC STORE

In [4]:
from llama_index.core.storage.docstore import SimpleDocumentStore
docstore = SimpleDocumentStore.from_persist_dir(persist_dir="D:/Learning New/GenAI/Project_RAG/RAG/1. Data/doc_store")
documents = list(docstore.docs.values())

### INDEXING (From document)

In [5]:
from llama_index.core import Document, VectorStoreIndex
index = VectorStoreIndex.from_documents(
    # remember, you must pass a list of documents!
    documents, 
    embed_model=Settings.embed_model,
    show_progress=True)

Parsing nodes: 100%|██████████| 49/49 [00:00<00:00, 3499.24it/s]
Generating embeddings: 100%|██████████| 49/49 [00:09<00:00,  4.93it/s]


### INDEXING (From nodes)

In [7]:
from llama_index.core.node_parser import SentenceSplitter

# instantiate a node parser
splitter = SentenceSplitter(
    chunk_size=64,
    chunk_overlap=32,
    paragraph_separator="\n\n\n\n",
)

# pass a list of documents to the node paraser
nodes = splitter.get_nodes_from_documents(documents)

# create the index from the nodes
index_from_nodes = VectorStoreIndex(
    nodes,
    embed_model=Settings.embed_model,
    show_progress=True
    )

Metadata length (30) is close to chunk size (64). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
Metadata length (26) is close to chunk size (64). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
Metadata length (29) is close to chunk size (64). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
Metadata length (29) is close to chunk size (64). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
Metadata length (21) is close to chunk size (64). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
Metadata length (25) is close to chunk size (64). Resulting chunks are less than 50 tokens

Generating embeddings: 100%|██████████| 957/957 [01:45<00:00,  9.11it/s]


### Saving Index

In [10]:
index.storage_context.persist(persist_dir="D:/Learning New/GenAI/Project_RAG/RAG/1. Data/index_store")
index_from_nodes.storage_context.persist(persist_dir="D:/Learning New/GenAI/Project_RAG/RAG/1. Data/index_from_node_store")

### QUERY

In [10]:
query = "Difference - PEB241T and PEB347T. What is capacity for each instrument?"

### Retrieval - Hybrid Fusion (BM25+Default), topk=10

In [12]:
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core import PromptTemplate
# from prompts import QUESTION_GEN_PROMPT

vector_retriever = index.as_retriever(similarity_top_k=10)

bm25_retriever = BM25Retriever.from_defaults(docstore=index.docstore, similarity_top_k=10)

# QUERY_GEN_PROMPT_TEMPLATE=PromptTemplate(QUESTION_GEN_PROMPT)

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=10,
    num_queries=1,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    # query_gen_prompt=QUERY_GEN_PROMPT_TEMPLATE, 
)

nodes_with_scores = retriever.retrieve(query)
for node in nodes_with_scores:
    print(f"Score: {node.score:.2f} - {node.text}...\n-----\n")

resource module not available on Windows
Score: 0.03 - FEATURES: 5" Touch screen Display. Aluminium alloy base. Date & Time Display. Multifunction Weighing Units. In-built rechargeable battery. RS232C Interface. Weighing, Counting, Percentage. Dynamic weighing function.
        SPECIFICATIONS: HSN: 90160020; Capacity: 220g, 320g; Readability: 0.001g (1mg); Repeatability (±): 0.001g; Linearity (±): 0.002g; Display: 5" Touch screen; Pan Size: 80mm; Calibration: External Calibration; Interface: RS232C; Date & Time: Yes; Tare Range: Full; Operating Temperature: 10°C to 40°C; Power Supply: DC Adaptor, using AC 220V/50-60Hz; Dimension (LxWxH): 360 x 260 x 355 mm; Weight: 4.5kg...
-----

Score: 0.03 - FEATURES: Overload Alarm. External Calibration. Extra Display (Optional). 
        SPECIFICATIONS: HSN: 90160020; Capacity: 200g, 300g; Readability: 0.001g (1mg); Repeatability (±): 0.001g; Linearity (±): 0.002g; Pan Size: 90mm; Calibration: External Calibration; Windshield Size: Ø150 x 80 mm; D

### Query Engine + Post processing

In [14]:
from llama_index.core import get_response_synthesizer
from llama_index.core.postprocessor import SimilarityPostprocessor

similarity_processor = SimilarityPostprocessor(similarity_cutoff=0.025)

response_synthsizer = get_response_synthesizer(llm=Settings.llm)

query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthsizer,
    node_postprocessors=[similarity_processor],
)
query_engine.query(query)

Response(response='The capacity for PEB241T is 220g and 320g, and the capacity for PEB347T is not specified in the given context.', source_nodes=[NodeWithScore(node=TextNode(id_='6e653348-7339-4a74-8891-e741aa8142f6', embedding=None, metadata={'Instrument type': 'Precision Electronic Balance', 'Sub category': '1mg', 'Model': 'PEB241T, PEB347T'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='429d4a3c-e10b-4391-805a-c5ae0eac5442', node_type='4', metadata={'Instrument type': 'Precision Electronic Balance', 'Sub category': '1mg', 'Model': 'PEB241T, PEB347T'}, hash='248a40a4e0bfd882a6976a1f436985303bedd8ba96ac4b3cb0cee7b131ddfc3f')}, metadata_template='{key}: {value}', metadata_separator='\n', text='FEATURES: 5" Touch screen Display. Aluminium alloy base. Date & Time Display. Multifunction Weighing Units. In-built rechargeable battery. RS232C Interface. Weighing, Counting, Percentage. Dynamic weighing 