### Import Handling

In [None]:

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.extractors import (
    TitleExtractor,
    QuestionsAnsweredExtractor,
    SummaryExtractor,
)

from llama_index.core.node_parser import (
    SemanticDoubleMergingSplitterNodeParser,
    LanguageConfig,
)

#from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.llms.groq import Groq
from llama_index.llms.ollama import Ollama
from llama_index.core.node_parser import (
    SentenceSplitter,
    SemanticSplitterNodeParser,
)
from llama_index.vector_stores.postgres import PGVectorStore

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openrouter import OpenRouter
from llama_index.core.llms import ChatMessage
import os 
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv('LLM_API_KEY')
OPEN_API_KEY = os.getenv('OPENAI_API_KEY')
openai_api_base = "https://openrouter.ai/api/v1"


In [None]:
# embed_model = HuggingFaceEmbedding(model_name="Alibaba-NLP/gte-large-en-v1.5", trust_remote_code=True)
embed_model = OpenAIEmbedding(model="text-embedding-3-large", dimensions=1024)

### Document Ingestion

In [None]:
documents = SimpleDirectoryReader("data").load_data()


In [None]:
print (documents[0].text)
print (documents[0].metadata)

### LLM

In [None]:
prompt = """ You are a chunk analysis assistant. Your task is to examine a chunk of text—typically extracted from a PDF document—and generate a **clear, concise one-line description** that accurately summarizes the key information contained in the chunk.

Instructions:
- Focus only on what is explicitly present in the chunk. Do not infer or interpret beyond the given content.
- Include visible structural clues such as section headers, bullet points, or table data if they help contextualize the description.
- Write in plain language suitable for downstream use in semantic retrieval or indexing.
- The output should be a single sentence that captures the main idea or purpose of the chunk.

**chunk content:**
{context_str}



"""


llm = OpenRouter(
    api_key=api_key,
    max_tokens=256,
    context_window=4096,
    model="meta-llama/llama-3.3-70b-instruct",
    prompt=prompt,
)

In [None]:
# groq_api_key="groq_api_key"
# llm = Groq(model="llama3-8b-8192", api_key=groq_api_key)
# llm = Ollama(model="llama3.2:latest", request_timeout=120.0)
# print(llm.complete("What is the Capital of France"))

### Chunking + Metadata Extraction

In [None]:
# multiple methods are listed go throught the dcumentation and use the ones you need


from llama_index.core.schema import MetadataMode
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
)

text_splitter = SentenceSplitter(
    chunk_size=512, chunk_overlap=50
)
semantic_text_splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
)
config = LanguageConfig(language="english", spacy_model="en_core_web_md")
double_semantic_merging_splitter = SemanticDoubleMergingSplitterNodeParser(
    language_config=config,
    initial_threshold=0.4,
    appending_threshold=0.6,
    merging_threshold=0.6,
    max_chunk_size=5000,
)
# print("double_semantic_merging_splitter_content:",double_semantic_merging_splitter)

extractors = [
    SummaryExtractor(summaries=["prev", "self", "next"], llm=llm),
    QuestionsAnsweredExtractor(
            questions=3, llm=llm, metadata_mode=MetadataMode.EMBED
        ),
]
summary_extractor = SummaryExtractor(summaries=["prev", "self", "next"], llm=llm)
qa_extractor = QuestionsAnsweredExtractor(
    questions=3, llm=llm, metadata_mode=MetadataMode.EMBED
)

title_extractor = TitleExtractor(nodes=5, llm = llm, node_template=prompt)
# print("Title extractor:", title_extractor)

# transformations = [semantic_text_splitter, KeywordExtractor(keywords=10)] + extractors
transformations = [semantic_text_splitter, KeywordExtractor(keywords=10)]
print(transformations)

### Ingestion Pipeline + Transformations

In [None]:
# OPTION ONE: Use the IngestionPipeline to CREATE NODES and at the same time chunks and make use of extractors

from llama_index.core.ingestion import IngestionPipeline
import nest_asyncio

nest_asyncio.apply()

pipeline = IngestionPipeline(
    transformations=transformations
)

nodes = pipeline.run(
    documents=documents,
    in_place=True,
    show_progress=True,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
print(documents)

parser = SentenceSplitter()

node_parser = parser.get_nodes_from_documents(documents)
print("Node Parser:", node_parser[0])

In [None]:
# # OPTION TWO: Make the nodes separately when you dont want to chunk your documents

# from llama_index.core.schema import TextNode

# nodes = [
#     TextNode(
#         text=doc.text,
#         metadata=doc.metadata,  # Optional
#         id_=doc.id_ if hasattr(doc, "id_") else None  # Optional
#     )
#     for doc in documents
# ]
# print("Nodes :",nodes[0].text)

In [None]:
print(f"Number of nodes: {len(nodes)}")
print(f"content: {nodes[0].metadata}")

In [None]:
# for extractor in qa_extractors:
#     nodes = extractor(nodes) 

In [None]:
# To check the qa_extrcator metadata of the nodes you can use the following code

# print('**********************')
# print(f"prev_section_summary: {nodes[1].metadata['prev_section_summary']}")
# print('**********************')
# print(f"next_section_summary: {nodes[1].metadata['next_section_summary']}")
# print('**********************')
# print(f"section_summary: {nodes[1].metadata['section_summary']}")
# print('**********************')
# print(f"content: {nodes[0].metadata['questions_this_excerpt_can_answer']}")

In [None]:
# for i in range(len(nodes)):
#     # print("nodes:", nodes[i].metadata) 
#     print(f"Node {i+1}:", nodes[i].metadata['section_summary'])

### Database Setup

In [None]:
import psycopg2

connection_string = "postgresql://postgres: YOURPASSWORD@localhost:YOURPORT"
db_name = "vector_hybrid_combined"
conn = psycopg2.connect(connection_string)
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

In [None]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="vector_store",
    embed_dim=1024,
    hybrid_search=True,
    text_search_config="english",
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

In [None]:
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,embed_model = embed_model,show_progress=True)

In [None]:
# Single prompt 

hybrid_query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid", sparse_top_k=5, dense_top_k=5,
)

hybrid_response = hybrid_query_engine.query(
    "What are the sum insured options of Arogya Sanjeevani policy?",
)

# Print the final response from the model
print("Response:\n", hybrid_response)

# Print source chunks and their metadata
print("\n--- Retrieved Chunks & Metadata ---\n")
for i, node in enumerate(hybrid_response.source_nodes, start=1):
    print(f"Chunk {i}:")
    print("Text:", node.node.text[:500].strip(), "...\n")  # Limit to first 500 chars
    print("Metadata:", node.node.metadata)
    print("-" * 60)


In [None]:
template_str = "your task is to answer the question based on the context provided. If the context does not contain enough information to asnwer the question, so please say 'I don't know'.\n\ncontext: {context_str}\n\nquestion: {query_str}\n\nChatHistory: {memory}"

In [None]:
#Chat System : The input tag would be displaye dat teh top of the code editor if no error is showing :).

from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.prompts import RichPromptTemplate
import os
from datetime import datetime

# Setup folder and file
log_dir = "chat_logs"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
base_filename = f"chat_session_{timestamp}"
history_file = os.path.join(log_dir, base_filename + ".txt")

# Initialize memory and prompt template
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
qa_template = RichPromptTemplate(template_str)

# Chat loop
while True:
    query = input("Your question (type 'correct' or 'wrong' to finish): ").strip()

    if query.lower() in ["correct", "wrong"]:
        # Rename the log file with the session outcome
        label = query.lower()
        new_file = os.path.join(log_dir, f"{base_filename}_{label}.txt")
        os.rename(history_file, new_file)
        print(f"\nSession saved to '{new_file}'\nExiting chat.")
        break

    # Format prompt
    prompt = qa_template.format(chat_history=memory, query_str=query)
    print("\nPrompt:\n", prompt)

    hybrid_chat_engine = index.as_chat_engine(
        chat_mode="context",
        memory=memory,
        sparse_top_k=10,
        dense_top_k=10,
        system_prompt=prompt
    )

    response = hybrid_chat_engine.chat(query)
    print("\nUser:", query)
    print("\nAssistant:", response)
    # Print source chunks and their metadata
    print("\n--- Retrieved Chunks & Metadata ---\n")
    for i, node in enumerate(response.source_nodes, start=1):
        print(f"Chunk {i}:")
        print("Text:", node.node.text[:500].strip(), "...\n")  # Limit to first 500 chars
        print("Metadata:", node.node.metadata)
        print("-" * 60)

    # Write to chat log
    with open(history_file, "a", encoding="utf-8") as f:
        f.write(f"User: {query}\n")
        f.write(f"Assistant: {response}\n\n")
