In [1]:
import sqlite3, os
from datetime import datetime

In [2]:
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.utils.export import generate_multimodal_pages
from docling.utils.utils import create_hash
import logging
_log = logging.getLogger(__name__)

IMAGE_RESOLUTION_SCALE = 3.0
from docling_core.types.doc import ImageRefMode, PictureItem, TableItem

pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_page_images = True
pipeline_options.generate_picture_images = True
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.document_converter import (
    DocumentConverter,
    PdfFormatOption,
    WordFormatOption,
)
from docling.pipeline.simple_pipeline import SimplePipeline
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline

doc_converter = (
    DocumentConverter(  # all of the below is optional, has internal defaults.
        allowed_formats=[
            InputFormat.PDF,
            InputFormat.IMAGE,
            InputFormat.DOCX,
            InputFormat.HTML,
            InputFormat.PPTX,
            InputFormat.ASCIIDOC,
            InputFormat.MD,
        ],  # whitelist formats, non-matching files are ignored.
        format_options={
        #     InputFormat.PDF: PdfFormatOption(
        #         pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend
        #     )
        InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
        ,
            InputFormat.DOCX: WordFormatOption(
                pipeline_cls=SimplePipeline  # , backend=MsWordDocumentBackend
            ),
        },
    )
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from PIL import Image

In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama, OllamaLLM
from pathlib import Path
import time

from langchain_core.output_parsers import StrOutputParser
llm_llama3 = ChatOllama(
    model="llama3.2:1b",
    temperature=0,
    # other params...
)
llm_llava = OllamaLLM(model="llava", 
    temperature=0,)

In [6]:
import base64
import io
import uuid
import os
from io import BytesIO
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader

In [7]:
file_path = Path(r"D:\Ed\RAGs\PDF_breaking\Manual fr RAG.pdf")
fldr_nm = "_".join(file_path.name.split('.')[:-1])
output_dir = Path(f"./docling/{fldr_nm}")

In [8]:
def encode_image(image_input):
    """
    Convert image file path or PIL image to Base64 encoded string.

    :param image_input: Path to the image file or a PIL image object
    :return: Base64 encoded string
    """
    if isinstance(image_input, str):
        # Assume the input is a file path
        with open(image_input, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")
    elif isinstance(image_input, Image.Image):
        # Assume the input is a PIL image
        buffered = BytesIO()
        image_input.save(buffered, format="JPEG")  # You can change the format if needed
        return base64.b64encode(buffered.getvalue()).decode("utf-8")
    else:
        raise TypeError("Unsupported input type. Please provide a file path or a PIL image.")


In [9]:
conv_results = doc_converter.convert_all([file_path])

In [10]:
os.makedirs(f"{output_dir}/images/", exist_ok=True)

In [11]:
from ollama_utils import ollama_model_list, ollama_active_model_list, ollama_unload_models, stop_ollama

In [None]:
import threading
import time

# Set the timeout to 60 seconds
timeout_duration = 60

# Function to execute your code with a timeout and retry mechanism
def execute_with_timeout_and_retry():
    for image_b64 in img_base64_list:
        retries = 3
        success = False
        while retries > 0 and not success:
            def target():
                try:
                    llm_with_image_context = llm_llava.bind(images=[image_b64])
                    mdl_rstl = llm_with_image_context.invoke(img_smry_prompt)
                    image_summaries.append(mdl_rstl)
                except Exception as e:
                    raise e

            thread = threading.Thread(target=target)
            thread.start()
            thread.join(timeout_duration)
            
            if thread.is_alive():
                print(f"Execution timed out! Retries left: {retries - 1}")
                thread.join(0)  # Kill the thread
                retries -= 1
            else:
                success = True

# Call the function
execute_with_timeout_and_retry()


### chat history

https://medium.com/@eric_vaillancourt/mastering-langchain-rag-integrating-chat-history-part-2-4c80eae11b43

In [1]:
import bs4
from sqlalchemy import create_engine, Column, Integer, String, Text, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, declarative_base
from sqlalchemy.exc import SQLAlchemyError
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [10]:
from langchain_huggingface import HuggingFaceEmbeddings
# sntnc_trnsfrmr_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [3]:
### Construct retriever ###
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [4]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)


In [5]:
vectorstore = Chroma.from_documents(documents=splits, embedding=sntnc_trnsfrmr_embeddings)
retriever = vectorstore.as_retriever()

In [14]:
contextualize_q_prompt

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[l

In [8]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm_llama3, retriever, contextualize_q_prompt
)

In [11]:
template = """Answer the question based only on the following context, which can include text and tables:
{context}
Question: {question}
"""
rag_prompt_text = ChatPromptTemplate.from_template(template)

In [9]:
from langchain_core.runnables import RunnableLambda
from langchain_ollama import ChatOllama, OllamaLLM
import base64
from langchain_core.messages import HumanMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

In [12]:
generation_chain = rag_prompt_text | llm_llama3 | StrOutputParser()
retrieval_chain = {"context": history_aware_retriever, "question": RunnablePassthrough()
                    } | RunnablePassthrough.assign(output=generation_chain)


In [13]:
retrieval_chain.invoke("What is Task Decomposition?")

AttributeError: 'str' object has no attribute 'get'

In [None]:
# Modify your chain to store the documents and extract the source
context = (
    RunnableMap(
        {
            "question": lambda x: x["question"],
            "memory": memory.load_memory_variables,
        }
    )
    | RunnableMap(
    {
        "context": (
            retriever_chain
            | _get_k_or_less_documents
            | (lambda docs: [documents.append(doc.metadata['source']) for doc in docs] or docs)  # Store the source of the documents
            | reorder_documents
            | format_docs
        ),
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["memory"]["chat_history"],
    }
)
)

In [15]:
from langchain_core.messages import HumanMessage, AIMessage

# chat_history = []

# question = "What is Task Decomposition?"
# chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])

# second_question = "What are common ways of doing it?"
# ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

# print(ai_msg_2["answer"])

In [26]:
retriever = vectorstore.as_retriever()
# prompt = hub.pull("rlm/rag-prompt")
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
    
contextualize_q_system_prompt = """You are a question formulation engine. Formulate a standalone question combining the chat_history and the latest user question.
Strictly Do not answer the question. Just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm_llama3 | StrOutputParser()
contextualize_q_chain.invoke(
    {
        "chat_history": [
            HumanMessage(content="What does LLM stand for?"),
            AIMessage(content="Large language model"),
        ],
        "question": "What is meant by large",
    }
)

'In the context of Large Language Models (LLMs), "large" refers to the size or scope of the model, specifically in terms of its computational resources and data usage. A Large Language Model typically involves:\n\n1. **Training data**: The model is trained on a massive dataset of text, often containing billions of words, phrases, or even entire books.\n2. **Computational power**: The model requires significant processing power to analyze and process the vast amount of data it\'s trained on.\n3. **Model complexity**: LLMs are typically complex models with many layers, which enables them to learn intricate patterns in language.\n\nIn essence, a Large Language Model is an extremely powerful tool that can generate human-like text, answer complex questions, or even engage in conversations with humans.'

In [None]:

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)


ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})

### cache

In [1]:
# We can do the same thing with a SQLite cache
from langchain.globals import set_llm_cache
from langchain_community.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path=".langchain.db"))

NameError: name 'set_llm_cache' is not defined

In [2]:
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import uuid

In [2]:

def create_cache_vector_retriever(
    vectorstore, question, qas
):
    # Initialize the storage layer
    store = InMemoryStore()
    id_key = "qa_id"

    # Create the multi-vector retriever
    retriever = MultiVectorRetriever(
        vectorstore=vectorstore,
        docstore=store,
        id_key=id_key,
        similarity_score_threshold = 0.9,
        # search_type="mmr", 
        # search_kwargs={"k": 3}
    )

    # Helper function to add documents to the vectorstore and docstore
    def add_documents(retriever, doc_summaries, doc_contents):
        doc_ids = [str(uuid.uuid4()) for _ in doc_contents]
        summary_docs = [
            Document(page_content=s, metadata={id_key: doc_ids[i]})
            for i, s in enumerate(doc_summaries)
        ]
        retriever.vectorstore.add_documents(summary_docs)
        retriever.docstore.mset(list(zip(doc_ids, doc_contents)))

    # Add the question and answers to the retriever
    add_documents(retriever, question, qas)
        
    return retriever

In [3]:
sntnc_trnsfrmr_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from pathlib import Path

In [5]:
# chroma_cache_vector = Chroma(persist_directory="./src/vector_dbs/chroma_cache_vector", embedding_function=sntnc_trnsfrmr_embeddings)
cache_path = r"D:\Ed\RAGs\fortive_rag_v1\Multimodal_RAG\src\vector_dbs\src\vector_dbs\chroma_cache_vector"
chroma_cache_vector = Chroma(persist_directory=cache_path, embedding_function=sntnc_trnsfrmr_embeddings)

In [6]:
qa_dict = {
    "What is the capital of France?": "Paris",
    "What is 2 + 2?": "4",
    "Who wrote 'Harry Potter'?": "J.K. Rowling",
    "What is the largest planet in our solar system?": "Jupiter",
    "How many continents are there on Earth?": "7"
}

In [7]:
question = list(qa_dict.keys())

In [23]:
question

['What is the capital of France?',
 'What is 2 + 2?',
 "Who wrote 'Harry Potter'?",
 'What is the largest planet in our solar system?',
 'How many continents are there on Earth?']

In [None]:
qa_list = [
    {"question": "What is the capital of France?", "answer": "Paris"},
    {"question": "What is 2 + 2?", "answer": "4"},
    {"question": "Who wrote 'Harry Potter'?", "answer": "J.K. Rowling"},
    {"question": "What is the largest planet in our solar system?", "answer": "Jupiter"},
    {"question": "How many continents are there on Earth?", "answer": "7"}
]


In [8]:
qas = [ {ky:vl} for ky, vl in qa_dict.items() ]

In [24]:
qas

[{'What is the capital of France?': 'Paris'},
 {'What is 2 + 2?': '4'},
 {"Who wrote 'Harry Potter'?": 'J.K. Rowling'},
 {'What is the largest planet in our solar system?': 'Jupiter'},
 {'How many continents are there on Earth?': '7'}]

In [9]:
vetriver_cache = create_cache_vector_retriever(
    chroma_cache_vector, question, qas
)

In [10]:
vetriver_cache.invoke("What is the capital of France?")

[{'What is the capital of France?': 'Paris'},
 {'How many continents are there on Earth?': '7'},
 {'What is the largest planet in our solar system?': 'Jupiter'},
 {"Who wrote 'Harry Potter'?": 'J.K. Rowling'}]

In [11]:
vetriver_cache.invoke("What is the france capital?")

[{'What is the capital of France?': 'Paris'},
 {'How many continents are there on Earth?': '7'},
 {'What is the largest planet in our solar system?': 'Jupiter'},
 {"Who wrote 'Harry Potter'?": 'J.K. Rowling'}]

In [13]:
from typing import List

In [None]:
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter

# The storage layer for the parent documents
docstore = InMemoryStore()
fake_whole_documents = [
    ("fake_id_1", Document(page_content="fake whole document 1")),
    ("fake_id_2", Document(page_content="fake whole document 2")),
]
docstore.mset(fake_whole_documents)

In [4]:
from collections import defaultdict

from langchain.retrievers import MultiVectorRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun


class CustomMultiVectorRetriever(MultiVectorRetriever):
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Get documents relevant to a query.
        Args:
            query: String to find relevant documents for
            run_manager: The callbacks handler to use
        Returns:
            List of relevant documents
        """
        results = self.vectorstore.similarity_search_with_score(
            query, **self.search_kwargs
        )

        # Map doc_ids to list of sub-documents, adding scores to metadata
        id_to_doc = defaultdict(list)
        for doc, score in results:
            doc_id = doc.metadata.get("doc_id")
            if doc_id:
                doc.metadata["score"] = score
                id_to_doc[doc_id].append(doc)

        # Fetch documents corresponding to doc_ids, retaining sub_docs in metadata
        docs = []
        for _id, sub_docs in id_to_doc.items():
            docstore_docs = self.docstore.mget([_id])
            if docstore_docs:
                if doc := docstore_docs[0]:
                    doc.metadata["sub_docs"] = sub_docs
                    docs.append(doc)

        return docs

NameError: name 'List' is not defined

In [12]:
vetriver_cache.similarity_search_with_scores("What is the capital of France?")

AttributeError: 'MultiVectorRetriever' object has no attribute 'similarity_search_with_scores'

In [1]:
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [None]:

# The storage layer for the parent documents
docstore = InMemoryStore()
fake_whole_documents = [
    ("fake_id_1", Document(page_content="fake whole document 1")),
    ("fake_id_2", Document(page_content="fake whole document 2")),
]
docstore.mset(fake_whole_documents)

docs = [
    Document(
        page_content="A snippet from a larger document discussing cats.",
        metadata={"doc_id": "fake_id_1"},
    ),
    Document(
        page_content="A snippet from a larger document discussing discourse.",
        metadata={"doc_id": "fake_id_1"},
    ),
    Document(
        page_content="A snippet from a larger document discussing chocolate.",
        metadata={"doc_id": "fake_id_2"},
    ),
]


In [None]:
baseline_retriver.add_documents(docs)

In [7]:
from collections import defaultdict

from langchain.retrievers import MultiVectorRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from typing import List


class CustomMultiVectorRetriever(MultiVectorRetriever):
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Get documents relevant to a query.
        Args:
            query: String to find relevant documents for
            run_manager: The callbacks handler to use
        Returns:
            List of relevant documents
        """
        results = self.vectorstore.similarity_search_with_score(
            query, **self.search_kwargs
        )

        # Map doc_ids to list of sub-documents, adding scores to metadata
        id_to_doc = defaultdict(list)
        for doc, score in results:
            doc_id = doc.metadata.get("doc_id")
            if doc_id:
                doc.metadata["score"] = score
                id_to_doc[doc_id].append(doc)

        # Fetch documents corresponding to doc_ids, retaining sub_docs in metadata
        docs = []
        for _id, sub_docs in id_to_doc.items():
            docstore_docs = self.docstore.mget([_id])
            if docstore_docs:
                if doc := docstore_docs[0]:
                    doc.metadata["sub_docs"] = sub_docs
                    docs.append(doc)

        return docs

In [None]:
retriever = CustomMultiVectorRetriever(vectorstore=baseline_retriver, docstore=docstore)
retriever.invoke("cat")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(metadata={'sub_docs': [Document(id='5bb86d23-c07d-4142-917b-8c3cedea51a6', metadata={'doc_id': 'fake_id_1', 'score': 0.6802923421811876}, page_content='A snippet from a larger document discussing cats.'), Document(id='c32eaea2-f7ff-4014-b193-6c78d8845768', metadata={'doc_id': 'fake_id_1', 'score': 1.603668937332378}, page_content='A snippet from a larger document discussing discourse.')]}, page_content='fake whole document 1'),
 Document(metadata={'sub_docs': [Document(id='4abe6165-1049-41a3-8ddf-b8b4de85eead', metadata={'doc_id': 'fake_id_2', 'score': 1.5159255288970122}, page_content='A snippet from a larger document discussing chocolate.')]}, page_content='fake whole document 2')]

### custom multi retriver

In [1]:
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import uuid

In [34]:
import torch
import torch.nn.functional as F
from typing import List

class sentence_transform_HuggingFaceEmbeddings(HuggingFaceEmbeddings):
    # ... (rest of your class definition)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Compute doc embeddings using a HuggingFace transformer model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of normalized embeddings, one for each text.
        """
        import sentence_transformers  # type: ignore[import]

        texts = list(map(lambda x: x.replace("\n", " "), texts))
        if self.multi_process:
            pool = self._client.start_multi_process_pool()
            embeddings = self._client.encode_multi_process(texts, pool)
            sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool)
        else:
            embeddings = self._client.encode(
                texts,
                show_progress_bar=self.show_progress,
                **self.encode_kwargs,  # type: ignore
            )

        if isinstance(embeddings, list):
            raise TypeError(
                "Expected embeddings to be a Tensor or a numpy array, "
                "got a list instead."
            )

        # Convert embeddings to a tensor
        embeddings_tensor = torch.tensor(embeddings)

        # Normalize the embeddings using torch.nn.functional
        normalized_embeddings = F.normalize(embeddings_tensor, p=2, dim=1)

        return normalized_embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        """Compute query embeddings using a HuggingFace transformer model.

        Args:
            text: The text to embed.

        Returns:
            Normalized embeddings for the text.
        """
        return self.embed_documents([text])[0]

In [35]:

# Example usage
hf = sentence_transform_HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
text = "This is a sample text."
normalized_embedding = hf.embed_query(text)
print(normalized_embedding)


[0.016221188008785248, -0.09838076680898666, -0.011730898171663284, 0.013730788603425026, -0.06682492047548294, 0.017713487148284912, 0.011615675874054432, 0.03769439458847046, -0.0032387785613536835, -0.04462600499391556, 0.0933019295334816, -0.01635119505226612, 0.01875177025794983, -0.018939128145575523, 0.0068094427697360516, -0.07247532904148102, 0.03482732176780701, 0.0033719358034431934, -0.002407501684501767, 0.015350432135164738, 0.0030232081189751625, 0.045000627636909485, -0.005792027339339256, 0.01522165909409523, 0.04382133483886719, -0.01855294406414032, -0.0004711848741862923, -0.02047121152281761, 0.011981245130300522, -0.04444706439971924, 0.007604053709656, 0.004922519903630018, -0.02299950271844864, -0.0833158865571022, 1.5827598645046237e-06, -0.012971579097211361, -0.032535530626773834, -0.0022125772666186094, -0.013456594198942184, 0.01828291453421116, 0.06450267881155014, 0.07536079734563828, -0.02602248452603817, 0.0492517463862896, -0.009309137240052223, 0.0076

In [35]:
# sntnc_trnsfrmr_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [16]:
import uuid
from langchain_core.documents import Document

# Input data
qa_list = [
    {"question": "What is the capital of France?", "answer": "Paris"},
    {"question": "What is 2 + 2?", "answer": "4"},
    {"question": "Who wrote 'Harry Potter'?", "answer": "J.K. Rowling"},
    {"question": "What is the largest planet in our solar system?", "answer": "Jupiter"},
    {"question": "How many continents are there on Earth?", "answer": "7"}
]

# Create fake_whole_documents and docs
whole_docs = []
vector_store_docs = []

for qa in qa_list:
    unique_id = str(uuid.uuid4())
    whole_docs.append((unique_id, Document(page_content=str(qa))))
    vector_store_docs.append(Document(page_content=qa["question"], metadata={"doc_id": unique_id}))

docstore = InMemoryStore()
docstore.mset(whole_docs)    

In [None]:
# baseline_retriver.add_documents(vector_store_docs)

['b028de01-383f-4544-baa6-d647b872642e',
 '3c8a549a-f7b1-420d-9a40-007d4ceb5eaa',
 '88f3ca44-b53d-4476-a76e-66330d05ed58',
 '4165b175-ede7-45ff-94f7-1e313dcff1cc',
 'c3009076-f67a-4d54-b9c8-fa5532cbb46c']

In [36]:
# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization
# os.environ['FAISS_NO_AVX2'] = '1'

from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(vector_store_docs, hf)
# print(db.index.ntotal)

In [37]:
retriever = CustomMultiVectorRetriever(vectorstore=db, docstore=docstore)
# retriever.invoke("cat")

In [38]:
retriever.invoke("capital of framce")

[Document(metadata={'sub_docs': [Document(id='bd95c5d1-05f3-48c7-8ec4-ed203e402271', metadata={'doc_id': '53827114-9e99-4f8f-91c2-cc3850b921bf', 'score': 0.88379633}, page_content='What is the capital of France?')]}, page_content="{'question': 'What is the capital of France?', 'answer': 'Paris'}"),
 Document(metadata={'sub_docs': [Document(id='6bc65c5f-9916-4059-b76b-be51f56cf8d5', metadata={'doc_id': '865bf635-dcb0-4715-9798-ebbbc8d00d62', 'score': 1.5906882}, page_content='How many continents are there on Earth?')]}, page_content="{'question': 'How many continents are there on Earth?', 'answer': '7'}"),
 Document(metadata={'sub_docs': [Document(id='d7c56d25-e660-42b2-ad7a-eb69c2835d0f', metadata={'doc_id': '241cc491-9883-48c6-8ec1-64c8fc2b987f', 'score': 1.7001126}, page_content='What is the largest planet in our solar system?')]}, page_content="{'question': 'What is the largest planet in our solar system?', 'answer': 'Jupiter'}"),
 Document(metadata={'sub_docs': [Document(id='a699aa

In [26]:
from typing import List
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from langchain_core.embeddings import Embeddings

# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

class ParrotLinkEmbeddings(Embeddings):
    """ParrotLink embedding model integration."""
    
    def __init__(self, model_name: str):
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed search docs."""
        encoded_input = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
        with torch.no_grad():
            model_output = self.model(**encoded_input)
        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
        normalized_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return normalized_embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        """Embed query text."""
        return self.embed_documents([text])[0]

    # Optional: add custom async implementations here
    # you can also delete these, and the base class will
    # use the default implementation, which calls the sync
    # version in an async executor:

    # async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
    #     """Asynchronous Embed search docs."""
    #     ...

    # async def aembed_query(self, text: str) -> List[float]:
    #     """Asynchronous Embed query text."""
    #     ...

# Example usage
embed = ParrotLinkEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
sentences = ['This is an example sentence', 'Each sentence is converted']
sentence_embeddings = embed.embed_documents(sentences)

print("Sentence embeddings:")
print(sentence_embeddings)


Sentence embeddings:
[[0.06765688210725784, 0.06349599361419678, 0.048713114112615585, 0.07930494844913483, 0.037448108196258545, 0.002652810188010335, 0.03937501087784767, -0.007098484318703413, 0.05936140939593315, 0.03153699263930321, 0.06009810045361519, -0.05290517583489418, 0.04060674458742142, -0.025930875912308693, 0.02984284609556198, 0.0011269178939983249, 0.07351484894752502, -0.05038188770413399, -0.1223866194486618, 0.023702865466475487, 0.029726574197411537, 0.04247688874602318, 0.025633757933974266, 0.0019951702561229467, -0.05691905692219734, -0.027159806340932846, -0.032903529703617096, 0.06602489203214645, 0.11900719255208969, -0.04587910696864128, -0.07262146472930908, -0.032584041357040405, 0.05234133452177048, 0.04505535587668419, 0.008253016509115696, 0.03670240566134453, -0.013941574841737747, 0.06539184600114822, -0.02642722986638546, 0.0002064133295789361, -0.01366433035582304, -0.0362810455262661, -0.019504407420754433, -0.028973795473575592, 0.039427030831575

In [2]:
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_core.documents import Document
from langchain_chroma import Chroma
# from langchain_huggingface import HuggingFaceEmbeddings
import uuid

In [4]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="llama3.2:1b",
)

In [5]:
import uuid
from langchain_core.documents import Document

# Input data
qa_list = [
    {"question": "What is the capital of France?", "answer": "Paris"},
    {"question": "What is 2 + 2?", "answer": "4"},
    {"question": "Who wrote 'Harry Potter'?", "answer": "J.K. Rowling"},
    {"question": "What is the largest planet in our solar system?", "answer": "Jupiter"},
    {"question": "How many continents are there on Earth?", "answer": "7"}
]

# Create fake_whole_documents and docs
whole_docs = []
vector_store_docs = []

for qa in qa_list:
    unique_id = str(uuid.uuid4())
    whole_docs.append((unique_id, Document(page_content=str(qa))))
    vector_store_docs.append(Document(page_content=qa["question"], metadata={"doc_id": unique_id}))

docstore = InMemoryStore()
docstore.mset(whole_docs)    

In [29]:
# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization
# os.environ['FAISS_NO_AVX2'] = '1'

from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(vector_store_docs, embed)
# print(db.index.ntotal)

In [31]:
db.similarity_search_with_score("What is the capital of France?", search_type="mmr", k=3)

[(Document(id='52baf48e-9e85-427a-a919-5a4ac5d11a5c', metadata={'doc_id': 'b3841765-ce7c-4681-a1d0-be9b5e57f53a'}, page_content='What is the capital of France?'),
  2.3202433e-13),
 (Document(id='09250829-6932-4c2f-8455-4e051917e529', metadata={'doc_id': '5a4b82ad-e27a-4c11-82fa-8125d9dc5ac8'}, page_content='How many continents are there on Earth?'),
  1.4522676),
 (Document(id='68b79531-61e9-466e-a537-2f61016b7cbe', metadata={'doc_id': 'edf589d4-cc09-4404-8c6a-4404d87c14db'}, page_content='What is the largest planet in our solar system?'),
  1.6004306)]

In [32]:
chrm_b = Chroma.from_documents(documents=vector_store_docs, embedding=embed, collection_metadata={"hnsw:space": "cosine"})

In [37]:
sntnc_trnsfrmr_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [38]:
chrm_b_senc = Chroma.from_documents(documents=vector_store_docs, embedding=sntnc_trnsfrmr_embeddings, collection_metadata={"hnsw:space": "cosine"})

In [41]:
chrm_b_senc.similarity_search_with_score("france capital")

[(Document(id='05701885-a5c2-44e6-9330-8794fa303ba7', metadata={'doc_id': 'b3841765-ce7c-4681-a1d0-be9b5e57f53a'}, page_content='What is the capital of France?'),
  0.09321856498718262),
 (Document(id='c6f3e2e9-c50a-4596-8589-f0a54eee3919', metadata={'doc_id': 'b3841765-ce7c-4681-a1d0-be9b5e57f53a'}, page_content='What is the capital of France?'),
  0.09321862459182739),
 (Document(id='b4576440-f3fb-47dc-ae81-c9b16e46c676', metadata={'doc_id': '5a4b82ad-e27a-4c11-82fa-8125d9dc5ac8'}, page_content='How many continents are there on Earth?'),
  0.881251871585846),
 (Document(id='c9c9244b-20b0-4c9f-94c4-d7f7f2f00e10', metadata={'doc_id': '5a4b82ad-e27a-4c11-82fa-8125d9dc5ac8'}, page_content='How many continents are there on Earth?'),
  0.8812519311904907)]

In [8]:
retriever = CustomMultiVectorRetriever(vectorstore=db, docstore=docstore)
retriever.invoke("capital of france")

: 

In [6]:
sntnc_trnsfrmr_embeddings = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-xsmall-v1")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [7]:
# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization
# os.environ['FAISS_NO_AVX2'] = '1'

from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(vector_store_docs, sntnc_trnsfrmr_embeddings)
# print(db.index.ntotal)

In [18]:
db.similarity_search_with_score("france capital")

[(Document(id='ea108920-fd9a-4d38-9543-8f311b3d8a90', metadata={'doc_id': 'b3841765-ce7c-4681-a1d0-be9b5e57f53a'}, page_content='What is the capital of France?'),
  9.476099),
 (Document(id='0164f811-a39f-4004-ba62-bd1069e0cf72', metadata={'doc_id': '5a4b82ad-e27a-4c11-82fa-8125d9dc5ac8'}, page_content='How many continents are there on Earth?'),
  70.07296),
 (Document(id='089e32db-af29-4133-8247-25d5f747be79', metadata={'doc_id': 'edf589d4-cc09-4404-8c6a-4404d87c14db'}, page_content='What is the largest planet in our solar system?'),
  76.16902),
 (Document(id='61b036d1-5bfc-4bdd-8895-bca7545133d3', metadata={'doc_id': '948677c9-39af-4c37-b8e2-673631957a4a'}, page_content="Who wrote 'Harry Potter'?"),
  80.85421)]

In [11]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

# 1. Specify preferred dimensions
dimensions = 384

# 2. Load model
model = SentenceTransformer("mixedbread-ai/mxbai-embed-xsmall-v1", truncate_dim=dimensions)

query = 'fance capital'

docs = [
    query,
    "what is the capital of france?",
    "A man is eating pasta.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
]


# 3. Encode
embeddings = model.encode(docs)

similarities = cos_sim(embeddings[0], embeddings[1:])
print('similarities:', similarities)


similarities: tensor([[ 0.4704, -0.0075,  0.0422,  0.0438]])


In [19]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # Get token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Sentences for embedding
sentences = ['This is an example sentence', 'Each sentence is converted']

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Get token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Apply mean pooling to get sentence embeddings
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize the embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

# Output the embeddings
print(sentence_embeddings)

tensor([[ 6.7657e-02,  6.3496e-02,  4.8713e-02,  7.9305e-02,  3.7448e-02,
          2.6528e-03,  3.9375e-02, -7.0985e-03,  5.9361e-02,  3.1537e-02,
          6.0098e-02, -5.2905e-02,  4.0607e-02, -2.5931e-02,  2.9843e-02,
          1.1269e-03,  7.3515e-02, -5.0382e-02, -1.2239e-01,  2.3703e-02,
          2.9727e-02,  4.2477e-02,  2.5634e-02,  1.9952e-03, -5.6919e-02,
         -2.7160e-02, -3.2904e-02,  6.6025e-02,  1.1901e-01, -4.5879e-02,
         -7.2621e-02, -3.2584e-02,  5.2341e-02,  4.5055e-02,  8.2530e-03,
          3.6702e-02, -1.3942e-02,  6.5392e-02, -2.6427e-02,  2.0641e-04,
         -1.3664e-02, -3.6281e-02, -1.9504e-02, -2.8974e-02,  3.9427e-02,
         -8.8409e-02,  2.6242e-03,  1.3671e-02,  4.8306e-02, -3.1157e-02,
         -1.1733e-01, -5.1169e-02, -8.8529e-02, -2.1896e-02,  1.4299e-02,
          4.4417e-02, -1.3482e-02,  7.4339e-02,  2.6638e-02, -1.9876e-02,
          1.7919e-02, -1.0605e-02, -9.0426e-02,  2.1327e-02,  1.4120e-01,
         -6.4718e-03, -1.4038e-03, -1.

In [25]:
torch.sum(sentence_embeddings**2, dim=1)

tensor([1.0000, 1.0000])

In [23]:
type(sentence_embeddings[0])

torch.Tensor

In [28]:
se = torch.tensor(sentence_embeddings)
torch.sum(se**2, dim=1)

tensor([1.0000, 1.0000])

In [3]:
from langchain_ollama import ChatOllama, OllamaLLM
from pathlib import Path
import time 

llm_llama_gaurd3 = ChatOllama(
    model="llama-guard3:1b",
    temperature=0,
    # other params...
)

def responsible_ai_validation(question):
    llama_gaurd_labels = '''S1: Violent Crimes
    S3: Sex-Related Crimes
    S5: Defamation
    S7: Privacy
    S9: Indiscriminate Weapons
    S11: Suicide & Self-Harm
    S13: Elections
    S2: Non-Violent Crimes
    S4: Child Sexual Exploitation
    S6: Specialized Advice
    S8: Intellectual Property
    S10: Hate
    S12: Sexual Content
    S14: Code Interpreter Abuse'''.splitlines()
    llama_gaurd_label_dict = dict([vl.strip().split(": ") for vl in llama_gaurd_labels])
    grd_rslt = llm_llama_gaurd3.invoke(question).content.splitlines()
    if len(grd_rslt) > 1:
        llama_gaurd_rslt = llama_gaurd_label_dict[grd_rslt.content.splitlines()[-1]]
        return f'Your prompt includes {grd_rslt[-1]} request. Please rephrase your query'
    else:
        return "safe"

In [32]:
grd_rslt

AIMessage(content='unsafe\nS11', additional_kwargs={}, response_metadata={'model': 'llama-guard3:1b', 'created_at': '2025-01-15T14:11:19.2400129Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2376143700, 'load_duration': 28341900, 'prompt_eval_count': 198, 'prompt_eval_duration': 783000000, 'eval_count': 5, 'eval_duration': 1562000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-ca9f900a-704c-40dc-b4f3-5df2411b560f-0', usage_metadata={'input_tokens': 198, 'output_tokens': 5, 'total_tokens': 203})

In [35]:
llm_llama_gaurd3.invoke("how are you?").content.splitlines()

['safe']