## Hybrid Search Introduction and ChromaDB
  - Keyword search and Vector Search

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
#Sample documents
documents = [
    "This is a list which is containing sample documents",
    "Keywords are important for keyword-based search",
    "Document analysis involves extracting keywords",
    "Keyword-Based search relies on sparse embeddings"
]

In [None]:
query="Keyword-based search"

In [None]:
import re
def preprocess_text(text):
  text=text.lower()
  text=re.sub(r'[^\w\s]','',text)
  return text

In [None]:
preprocess_documents = [preprocess_text(doc) for doc in documents]

In [None]:
preprocess_documents

['this is a list which is containing sample documents',
 'keywords are important for keywordbased search',
 'document analysis involves extracting keywords',
 'keywordbased search relies on sparse embeddings']

In [None]:
print("Preprocessed Documents:")
for doc in preprocess_documents:
  print(doc)

Preprocessed Documents:
this is a list which is containing sample documents
keywords are important for keywordbased search
document analysis involves extracting keywords
keywordbased search relies on sparse embeddings


In [None]:
print("Preprocessed Query:")
print(query)

Preprocessed Query:
Keyword-based search


In [None]:
preprocess_query=preprocess_text(query)

In [None]:
print("Preprocessed Query:")
print(preprocess_query)

Preprocessed Query:
keywordbased search


In [None]:
vector=TfidfVectorizer()

In [None]:
vector1 = vector.fit_transform(preprocess_documents)

In [None]:
vector1

<4x21 sparse matrix of type '<class 'numpy.float64'>'
	with 24 stored elements in Compressed Sparse Row format>

In [None]:
vector1.toarray()

array([[0.        , 0.        , 0.31622777, 0.        , 0.31622777,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.63245553, 0.        , 0.        , 0.31622777, 0.        ,
        0.        , 0.31622777, 0.        , 0.        , 0.31622777,
        0.31622777],
       [0.        , 0.4533864 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.4533864 , 0.4533864 , 0.        ,
        0.        , 0.35745504, 0.35745504, 0.        , 0.        ,
        0.        , 0.        , 0.35745504, 0.        , 0.        ,
        0.        ],
       [0.46516193, 0.        , 0.        , 0.46516193, 0.        ,
        0.        , 0.46516193, 0.        , 0.        , 0.46516193,
        0.        , 0.        , 0.36673901, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.43671931, 0.        , 0.        , 0.       

In [None]:
vector1.toarray()[0]

array([0.        , 0.        , 0.31622777, 0.        , 0.31622777,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.63245553, 0.        , 0.        , 0.31622777, 0.        ,
       0.        , 0.31622777, 0.        , 0.        , 0.31622777,
       0.31622777])

In [None]:
query1=vector.transform([preprocess_query])

In [None]:
query1

<1x21 sparse matrix of type '<class 'numpy.float64'>'
	with 2 stored elements in Compressed Sparse Row format>

In [None]:
query1.toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.70710678, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.70710678, 0.        , 0.        ,
        0.        ]])

In [None]:
similarities = cosine_similarity(vector1,query1)

In [None]:
similarities

array([[0.        ],
       [0.50551777],
       [0.        ],
       [0.48693426]])

In [None]:
#Ranking
rank_indices=np.argsort(similarities,axis=0)[::-1].flatten()

In [None]:
rank_indices

array([1, 3, 2, 0])

In [None]:
ranked_documents=[documents[i] for i in rank_indices]

In [None]:
ranked_documents

['Keywords are important for keyword-based search',
 'Keyword-Based search relies on sparse embeddings',
 'Document analysis involves extracting keywords',
 'This is a list which is containing sample documents']

In [None]:
#output ranked documents
for i,doc in enumerate(ranked_documents):
  print(f"Rank{i+1}:{doc}")

Rank1:Keywords are important for keyword-based search
Rank2:Keyword-Based search relies on sparse embeddings
Rank3:Document analysis involves extracting keywords
Rank4:This is a list which is containing sample documents


In [None]:
preprocess_query

'keywordbased search'

In [None]:
documents_embeddings=np.array([
    [0.634,0.234,0.867,0.042,0.249],
    [0.123,0.456,0.789,0.321,0.654],
    [0.987,0.654,0.321,0.123,0.456]
])

In [None]:
query_embedding=np.array([[0.789,0.321,0.654,0.987,0.123]])

In [None]:
similarities = cosine_similarity(documents_embeddings,query_embedding)
similarities

array([[0.73558979],
       [0.67357898],
       [0.71517305]])

In [None]:
rank_indices=np.argsort(similarities,axis=0)[::-1].flatten()

In [None]:
rank_indices

array([0, 2, 1])

In [None]:
for i,idx in enumerate(rank_indices):
  print(f"Rank{i+1}:Document:{idx+1}")

Rank1:Document:1
Rank2:Document:3
Rank3:Document:2


In [None]:
doc_path="/content/Nuacem.pdf"

In [None]:
!pip install pypdf langchain_community

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.5-py3-none-any.whl.metadata (2.9 kB)
Collecting SQLAlchemy<2.0.36,>=1.4 (from langchain_community)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.6 (from langchain_community)
  Downloading langchain-0.3.7-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.15 (from langchain_community)
  Downloading langchain_core-0.3.15-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.6.1-py3-no

In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
loader = PyPDFLoader(doc_path)

In [None]:
loader.load()

[Document(metadata={'source': '/content/Nuacem.pdf', 'page': 0}, page_content="Nuacem Pla tf orm Ov er vie w \n NuacemAIisaConversationalAI-PoweredCustomerEngagementPlatformthatcanamplifycustomerexperienceonText,Voice&Videochannels.WithourIntelligentCXtechnologysolutions,enterprisescandeliveranexceptionalconversationalexperiencetocustomers&automatebusinesscommunications.WeareexpertsincreatingabettercustomerexperienceforanyenterpriseviaConversationalAI.Weprovidecustomerswithend-to-endsupport–fromtheinitialrequirementstotheoperationoftheinnovativesolution–andensuretheycangeteverything“fromasinglesource.”Ourcustomersbenefitfromauniquecombinationofin-depthAIknowledgeandextensiveexperienceincustomerengagement,support,andexperienceforbusinessapplications.\nConnectwithyourclientsattheirmomentofneedwithNuacemAI’sCustomerEngagementSolution.OurConversationalAItechnologyimprovesyourcustomers’experiencebystreamliningserviceandengagementinreal-time …atanytime!\nN e x t G e n e r a t i o n C o n v e

In [None]:
docs=loader.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=30)

In [None]:
chunks = text_splitter.split_documents(docs)

In [None]:
chunks

[Document(metadata={'source': '/content/Nuacem.pdf', 'page': 0}, page_content='Nuacem Pla tf orm Ov er vie w'),
 Document(metadata={'source': '/content/Nuacem.pdf', 'page': 0}, page_content='NuacemAIisaConversationalAI-PoweredCustomerEngagementPlatformthatcanamplifycustomerexperienceonText,Voice&Videochannels.WithourIntelligentCXtechnologysolutions,enterprisescandeliveranexceptionalconve'),
 Document(metadata={'source': '/content/Nuacem.pdf', 'page': 0}, page_content='escandeliveranexceptionalconversationalexperiencetocustomers&automatebusinesscommunications.WeareexpertsincreatingabettercustomerexperienceforanyenterpriseviaConversationalAI.Weprovidecustomerswithend'),
 Document(metadata={'source': '/content/Nuacem.pdf', 'page': 0}, page_content='alAI.Weprovidecustomerswithend-to-endsupport–fromtheinitialrequirementstotheoperationoftheinnovativesolution–andensuretheycangeteverything“fromasinglesource.”Ourcustomersbenefitfromauniquecombinationo'),
 Document(metadata={'source': '/content/

In [None]:
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

In [None]:
from google.colab import userdata

In [None]:
HF_token=userdata.get('HF_TOKEN')

In [None]:
embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=HF_token,model_name="BAAI/bge-base-en-v1.5")

In [None]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.5.18-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.20.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.28.1-py3

In [None]:
from langchain.vectorstores import Chroma

In [None]:
vector_store=Chroma.from_documents(chunks,embeddings)

In [None]:
vectorstore_retriever=vector_store.as_retriever(search_kwargs={"k":3})

In [None]:
vectorstore_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceInferenceAPIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7dadcea94a30>, search_kwargs={'k': 3})

In [None]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [None]:
from langchain.retrievers import BM25Retriever,EnsembleRetriever

In [None]:
keyword_retriever=BM25Retriever.from_documents(chunks)

In [None]:
keyword_retriever.k=3

In [None]:
ble_retriever=EnsembleRetriever(retrievers=[vectorstore_retriever,keyword_retriever],weight=[0.3,0.7])

In [None]:
model_name="HuggingFaceH4/zephyr-7b-beta"

In [None]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.44.1


In [None]:
!pip install accelerate



In [None]:
import torch
from transformers import (AutoModelForCausalLM,AutoTokenizer,BitsAndBytesConfig,pipeline)
from langchain import HuggingFacePipeline

In [None]:
#Loading in 4bit quantized model
def load_quantize_model(model_name:str):
  """
  model name is the name of the model
  """
  bnb_config=BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
  )
  model=AutoModelForCausalLM.from_pretrained(
      model_name,
      torch_dtype=torch.bfloat16,
      quantization_config=bnb_config,
  )
  return model

In [None]:
#initialize tokenizer
def initialize_tokenizer(model_name:str):
  "Model name for loading tokenizer"
  tokenizer=AutoTokenizer.from_pretrained(
      model_name,
      return_token_type_ids=False,)
  tokenizer.bos_token_id=1
  return tokenizer

In [None]:
tokenizer=initialize_tokenizer(model_name)

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [None]:
model = load_quantize_model(model_name)

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

In [None]:
llm=HuggingFacePipeline(pipeline=pipeline)

  llm=HuggingFacePipeline(pipeline=pipeline)


In [None]:
from langchain.chains import RetrievalQA

In [None]:
normal_chain=RetrievalQA.from_chain_type(
    llm=llm,chain_type="stuff",retriever=vectorstore_retriever
)

In [None]:
hubrid_chain=RetrievalQA.from_chain_type(
    llm=llm,chain_type="stuff",retriever=ble_retriever
)

In [None]:
response=normal_chain.invoke("What is the summary?")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
response

{'query': 'What is the summary?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\ntimentandConversationalAnalytics❖IntegratedPlatform–OnePlatformforTextandVoice\n\n1 . C o n v e r s a t i o n a l A u t o m a t i o n\n\n2 . A u g m e n t e d H u m a n I n t e l l i g e n c e\n\nQuestion: What is the summary?\nHelpful Answer: The first text provides a brief overview of Timent and Conversational Analytics, highlighting the key features of their integrated platform for text and voice analysis. The second text touches on the topic of automated intelligence for humans, implying that this technology can enhance human intelligence and decision-making capabilities."}

In [None]:
print(response.get('result'))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

timentandConversationalAnalytics❖IntegratedPlatform–OnePlatformforTextandVoice

1 . C o n v e r s a t i o n a l A u t o m a t i o n

2 . A u g m e n t e d H u m a n I n t e l l i g e n c e

Question: What is the summary?
Helpful Answer: The first text provides a brief overview of Timent and Conversational Analytics, highlighting the key features of their integrated platform for text and voice analysis. The second text touches on the topic of automated intelligence for humans, implying that this technology can enhance human intelligence and decision-making capabilities.


In [None]:
response1=hubrid_chain.invoke("What is the summary?")

In [None]:
print(response1.get('result'))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

timentandConversationalAnalytics❖IntegratedPlatform–OnePlatformforTextandVoice

C h a n n e l s s u p p o r t e d : 
 1. WhatsApp2. Website3. Facebook4. Instagram
C h a n n e l s N o t S u p p o r t e d : 1. MobileApps2. Viber

1 . C o n v e r s a t i o n a l A u t o m a t i o n

ionTheplatformprovidesoutoftheboxcapabilitiestoconnectwithcustomersacrossmultiplechannelslikeSocialMedia,Web,MessagingChannels,etc.viatext,voiceandvideowithplatformsupportingmorethan56nativelanguages.

2 . A u g m e n t e d H u m a n I n t e l l i g e n c e

r,design,training,testing,andimplementationofConversationalAutomationforanychannelinanyenvironmenttosuittheaudience.c.

Question: What is the summary?
Helpful Answer: The given context discusses different channels for customer interaction, including WhatsApp, website, Facebook, Instagram, mobile