## Dependencies

In [None]:
import torch
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.prompts.prompts import SimpleInputPrompt
from llama_index.llms.huggingface import HuggingFaceLLM
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import os




In [2]:

# 4. Define paths for storing the FAISS index and document metadata
data_path = "./data/pdf_data"
faiss_index_path = "./data/faiss_index.bin"
doc_metadata_path = "./data/doc_metadata.pkl"


In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Projects\\python\\journAI'

## Load Model

In [5]:
# 2. Define system prompts or context
system_prompt = """
You are an intelligent Q&A assistant. Your role is to provide precise, accurate, and concise answers to user queries based on the context retrieved from the knowledge base. 

Guidelines:
1. Use the retrieved context to answer questions; avoid adding unsupported information.
2. If the context is insufficient, state: "The provided context does not contain enough information."
3. Be brief and professional in your responses.

Your ultimate goal is to assist the user effectively.
"""


# 3. Define how the user query should be wrapped
query_wrapper_prompt = SimpleInputPrompt(
    "Based on the following context, respond to the user's query:\n\n<|CONTEXT|>\n\n<|USER|>{query_str}\n<|ASSISTANT|>"
)


In [6]:
# device_map = {
#     "transformer": "cuda:0",  # Put most of the model on the GPU
#     "lm_head": "cpu",  # Offload the output head to the CPU
# }
llm = HuggingFaceLLM(
    context_window=2040,  # 4096,
    max_new_tokens=128,  # 256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",  # Auto-distribute model layers across devices
    model_kwargs={
        "torch_dtype": torch.float16,  # Use mixed precision for lower memory usage
        "load_in_8bit": True,
        "llm_int8_enable_fp32_cpu_offload": True  # Enable CPU offloading for unsupported modules
    }
)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


In [7]:
import numpy as np
# 1. Load documents from PDF files
docs = SimpleDirectoryReader("./data/pdf_data").load_data()

# 2. Set up embeddings using SentenceTransformer
embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# 3. Define a function to embed documents
def embed_documents(documents, model):
    return [model.encode(doc.text, show_progress_bar=True) for doc in documents]

# 5. Initialize or load FAISS index and metadata
if os.path.exists(faiss_index_path) and os.path.exists(doc_metadata_path):
    print("Loading existing FAISS index and metadata.")
    
    # Load FAISS index
    faiss_index = faiss.read_index(faiss_index_path)
    
    # Load metadata
    with open(doc_metadata_path, "rb") as f:
        metadata = pickle.load(f)
else:
    print("FAISS index or metadata not found. Creating a new index.")
    
    # Embed documents and convert to NumPy array
    embeddings = embed_documents(docs, embed_model)
    embeddings = np.array(embeddings, dtype="float32")

    # Create FAISS index
    dimension = embeddings.shape[1]  # Embedding dimension
    faiss_index = faiss.IndexFlatL2(dimension)  # L2 distance index
    faiss_index.add(embeddings)

    # Save FAISS index
    faiss.write_index(faiss_index, faiss_index_path)

    # Save metadata
    metadata = [{"text": doc.text, "id": i} for i, doc in enumerate(docs)]
    with open(doc_metadata_path, "wb") as f:
        pickle.dump(metadata, f)


# 6. Function to query the FAISS index
def query_index(query, model, faiss_index, metadata, top_k=5):
    query_vector = model.encode(query)
    distances, indices = faiss_index.search(query_vector.reshape(1, -1), top_k)
    
    # Retrieve results
    results = [{"text": metadata[idx]["text"], "id": metadata[idx]["id"], "distance": distances[0][i]} 
               for i, idx in enumerate(indices[0])]
    return results


Loading existing FAISS index and metadata.


In [8]:
# 7. Ask a question
query = "Explain the SAM algorithm?"
results = query_index(query, embed_model, faiss_index, metadata, top_k=3)

# 8. Print results
for i, result in enumerate(results):
    print(f"Result {i + 1}:")
    print(f"Text: {result['text']}")
    print(f"ID: {result['id']}")
    print(f"Distance: {result['distance']}\n")
# Updated result printing
def print_results(results):
    for i, result in enumerate(results):
        print(f"Result {i + 1}:")
        # Clean and truncate the text for better readability
        cleaned_text = " ".join(result["text"].split())  # Remove extra whitespaces and line breaks
        truncated_text = cleaned_text[:500] + "..." if len(cleaned_text) > 500 else cleaned_text
        print(f"Text: {truncated_text}")
        print(f"ID: {result['id']}")
        print(f"Distance: {result['distance']:.4f}\n")  # Show distance with 4 decimal points

# Print results using the updated function
print_results(results)


KeyError: 'text'

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index.core import Settings
# ------------------------------------------------------
# 1. Define Embedding Model (No More LangchainEmbedding)
# ------------------------------------------------------
# embedding_model = HuggingFaceEmbeddings(
#     model_name="sentence-transformers/all-mpnet-base-v2",
#     # Optionally adjust parameters, e.g. batch_size
#     # batch_size=16,
# )
embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
# -----------------------------
# 3. Load Documents (PDF/Text)
# -----------------------------
docs = SimpleDirectoryReader("./data/pdf_data").load_data()

# -----------------------------
# 4. Build VectorStoreIndex
#    Make sure to use 'embed_model=...' (NOT 'embedding=...')
# -----------------------------
index = VectorStoreIndex.from_documents(
    docs,
    embed_model=embedding_model,   # <-- This ensures local embedding is used
    chunk_size_limit=1024
)

# -----------------------------
# 5. Create a Query Engine
#    Pass the local HuggingFace LLM
# -----------------------------
query_engine = index.as_query_engine(llm=llm)

# -----------------------------
# 6. Ask a Question
# -----------------------------
response = query_engine.query("What is attention mechanism?")
print(response)

  embedding_model = HuggingFaceEmbeddings(



 Sampling masks Automatically Generation ( SAM algorithm automically Generates masks from inputted prompt prompts image segmentation segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation Segmentation


In [10]:
response = query_engine.query("What is attention mechanism?")
print(response)



Attention mechanism is a way of attention paid to certain parts of input when outputting something else entirely different from inputted parts attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechanism attention mechani