# **Hybrid Search and Re-Ranking in in RAG:**

In [None]:
# install libaries:
!pip -q install sentence-transformers==2.2.2
!pip -q install langchain
!pip -q install chromadb
!pip -q install langchain_community
!pip -q install langchain_google_genai
!pip -q install weaviate-client

In [None]:
import weaviate
import os

In [None]:
# Get the Weaviate API Key, URL, etc.

WEAVIATE_CLUSTER_URL = ""
WEAVIATE_API_KEY = ""
HF_TOKEN = ""

In [None]:
# Create Client to connect to Weaviate Cluster:

client = weaviate.Client(
    url=WEAVIATE_CLUSTER_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY),
    additional_headers={
         "X-HuggingFace-Api-Key": HF_TOKEN
    },
)

In [None]:
client.is_ready(), client.is_live()

(True, True)

In [None]:
# Check if any Schema is present or not:

client.schema.get()

{'classes': []}

In [None]:
# Create Schema:

schema = {
    "classes": [
        {
            "class": "RAG",
            "description": "Documents for RAG",
            "vectorizer": "text2vec-huggingface",
            "moduleConfig": {"text2vec-huggingface": {"model": "sentence-transformers/all-MiniLM-L6-v2", "type": "text"}},
            "properties": [
                {
                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-huggingface": {
                            "skip": False,
                            "vectorizePropertyName": False,
                        }
                    },
                    "name": "content",
                },
            ],
        },
    ]
}

client.schema.create(schema) # create schema

In [None]:
# Create Hybrid Search Retriever:

from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever


In [None]:
# initialize the weaviate-retriever for hybrid search

retriever = WeaviateHybridSearchRetriever(
    client=client,
    index_name="RAG",
    text_key="content",
    k=10,
    alpha=0.6,
    attributes=[],
    create_schema_if_missing=True
)

In [None]:
# Load LLM:

from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain import HuggingFaceHub



GEMINI_API_KEY = userdata.get("GEMINI_API_KEY")

llm1 = ChatGoogleGenerativeAI(
    model="gemini-pro",
    google_api_key=GEMINI_API_KEY,
    temperature=0.5,
    max_tokens=1024,
    max_length=1024,
)



HuggingFace_API_Token = userdata.get("HF_TOKEN")
llm2 = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2",
                     huggingfacehub_api_token=HuggingFace_API_Token,
                     model_kwargs={"temperature":0.1, "max_length":512})

In [None]:
!pip install requests beautifulsoup4

In [None]:
# Extracting Urls from a specific website:

import requests
from bs4 import BeautifulSoup
import urllib.parse

def retrieve_urls(website_url):
    try:
        # Send a GET request to the website
        response = requests.get(website_url)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all anchor tags with href attributes
        anchor_tags = soup.find_all('a', href=True)

        # Extract URLs and make them absolute if they are relative
        urls = set()
        for tag in anchor_tags:
            url = tag['href']
            full_url = urllib.parse.urljoin(website_url, url)
            urls.add(full_url)

        return urls

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the website: {e}")
        return set()

# Example usage
website_url = "https://edzlms.com/"
urls = retrieve_urls(website_url)
urls = list(urls)
urls

['https://edzlms.com/index.php/sales-training/',
 'https://edzlms.com/index.php/portals/',
 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-7-1.png',
 'https://edzlms.com/index.php/employee-training/',
 'https://edzlms.com/index.php/lms-features/',
 'https://edzlms.com/index.php/ai-powered-learning/',
 'https://edzlms.com/index.php/security-2/',
 'https://edzlms.com/index.php/detailed-feature-list/',
 'https://edzlms.com/index.php/user-management/',
 'https://edzlms.com/index.php/customer-support/',
 'https://favdevs.com/demos/wp/zubaz/contact-us/',
 'https://www.facebook.com/Mylmsofindia',
 'https://edzlms.com/index.php/super-fast-management-of-user/',
 'https://edzlms.com/index.php/training-delivery-methodologies/',
 'https://edzlms.com/',
 'https://edzlms.com/index.php/gamification-and-learners-engagement/',
 'https://edzlms.com/index.php/contact/',
 'https://edzlms.com/index.php/reporting-and-tracking/',
 'https://edzlms.com',
 'https://edzlms.com/index.php/insurance-traini

In [None]:
!pip install unstructured
# !pip install numpy==1.24.4

In [None]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# Load the documents from given url:

URLs = [
    'https://edzlms.com/index.php/sales-training/',
    'https://edzlms.com/index.php/portals/',
    # 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-7-1.png',
    'https://edzlms.com/index.php/employee-training/',
    'https://edzlms.com/index.php/lms-features/',
    'https://edzlms.com/index.php/ai-powered-learning/',
    'https://edzlms.com/index.php/security-2/',
    'https://edzlms.com/index.php/detailed-feature-list/',
    'https://edzlms.com/index.php/user-management/',
    'https://edzlms.com/index.php/customer-support/',
    'https://favdevs.com/demos/wp/zubaz/contact-us/',
    'https://www.facebook.com/Mylmsofindia',
    'https://edzlms.com/index.php/super-fast-management-of-user/',
    'https://edzlms.com/index.php/training-delivery-methodologies/',
    # 'https://edzlms.com/',
    'https://edzlms.com/index.php/gamification-and-learners-engagement/',
    'https://edzlms.com/index.php/contact/',
    'https://edzlms.com/index.php/reporting-and-tracking/',
    'https://edzlms.com',
    'https://edzlms.com/index.php/insurance-training/',
    'https://edzlms.com/index.php/extended-enterprise/',
    'https://edzlms.com/index.php/hospital-and-medical/',
    'https://edzlms.com/index.php/lms-with-ecommerce/',
    'https://edzlms.com/index.php/other-industry/',
    # 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-9-2.png',
    'https://edzlms.com/index.php/terms-of-use/',
    'https://edzlms.com/index.php/training-delivery/',
    'https://calendly.com/edzlms/30min',
    'https://calendly.com/edzlms/30min?month=2024-06',
    # 'https://www.youtube.com/',
    'https://edzlms.com/index.php/customer-experience/',
    'https://edzlms.com/index.php/integrations-partnership/',
    'https://edzlms.com/index.php/lms-security/',
    'https://edzlms.com/index.php/plans/',
    'https://edzlms.com/index.php/blogs/',
    'https://edzlms.com/index.php/learner-engagement/',
    'https://edzlms.com/index.php/about-the-team/',
    'https://edzlms.com/#primary',
    'https://edzlms.com/index.php/healthcare/',
    'https://edzlms.com/index.php/continuous-learning-for-employee/',
    'https://edzlms.com/index.php/reporting/',
    'https://edzlms.com/index.php/multiple-environment-portal/',
    'https://edzlms.com/index.php/train-your-customers/',
    'https://edzlms.com/index.php/university-lms/',
    'https://edzlms.com/index.php/ebooks/',
    'https://www.linkedin.com/company/lmsofindia/',
    'https://edzlms.com/index.php/customer-training/',
    'https://edzlms.com/index.php/monetise-content-with-ecommerce/',
    # 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-5-2.png',
    'https://edzlms.com/index.php/detailed-features-list/',
    'https://edzlms.com/index.php/our-team/',
    'https://edzlms.com/index.php/integration/',
    'https://edzlms.com/index.php/course-creation/',
    'https://edzlms.com/index.php/course-content-creation/',
    'https://edzlms.com/index.php/insurance-sectors/',
    'https://edzlms.com/index.php/extended-training-portal/',
    # 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-8-1.png',
    'https://edzlms.com/index.php/university-lms',
    'https://edzlms.com/index.php/case-study/',
    # 'https://edzlms.com/wp-content/uploads/2024/06/CREATIVE-6-1.png'
]

loader = UnstructuredURLLoader(urls = URLs)
documents = loader.load()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
len(documents)

53

In [None]:
documents[0].page_content

In [None]:
# Perform Chunkings:

splitter = RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=300)
chunks = splitter.split_documents(documents)

In [None]:
len(chunks)

615

In [None]:
chunks[30]

Document(page_content='Resources   \n                \t\t\n\t\t\t\t\n\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\tRead, watch, learn\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\tBlogs\t\t\t\t\t\t\t\t\n                               \n\t\t\t\t\t\t\t                            \n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t\tRead our latest blogs.\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\tCase Study\t\t\t\t\t\t\t\t\n         

In [None]:
# Upload or add documents to Weaviate Cluster:

retriever.add_documents(chunks)

['f8e3c2e9-5431-4829-a933-2a3b0e26b06f',
 '03b6f99f-58f7-429b-8d8d-190bd1328e58',
 '389a686e-4747-4086-8570-06c9931151c6',
 '5e5837bb-a1b1-4a0f-ac58-d3ba252232be',
 '0d043f94-1cb5-4334-8eac-1232ea2c0f13',
 '634e2ef2-da00-4db3-bcb1-55421c03329f',
 'b37c0a80-6385-4ce4-88a5-cb178dc1925f',
 '2b282a19-701e-40b9-b464-174edf6e02c0',
 '72707112-68bf-4a60-ab5a-bdead4191f82',
 '5cdfb3e7-4001-4fba-898f-14e1069c2b38',
 '98b76b09-471b-4cc6-abce-a2cab02a284e',
 '6f1c7190-061a-4e37-b6ef-2d7886563e62',
 'c0c449e6-94a0-4da4-afff-1b64c85680cb',
 'b6328df1-88ec-4700-a4d9-fe2cf1120c05',
 '56830572-4939-41bc-a8f7-ee07a297984e',
 '2b4e8b0d-9be5-40fb-b153-dfb81ec5728e',
 'b11fdc0e-e329-49a3-8926-16dfd87fbf4d',
 '20702e0c-953d-432f-a875-7aaaeaedb78e',
 'a4b5f522-d49b-42f6-ab04-75b4ec0a62ee',
 'a7469723-4160-4212-86c0-caf92591381b',
 '27dc2a26-20d1-4ad9-9bcc-751a778ad245',
 'dc0778ff-6a42-4d52-8c92-14849412d0f2',
 'd446b7ff-eed5-4039-b6b7-4181d611610a',
 '2386c5b2-f8f3-4fe1-b2b3-3ed2b7d4ae62',
 '92be611e-7e3c-

In [None]:
# ask question and get result from Weaviate:

retriever.invoke('What is EdzLearn?')

In [None]:
retriever.invoke(
    'What is EdzLearn?',
    score=True
)

[Document(page_content='14. Notices\n\nAll notices to EdzLearn should be sent to our designated addresses:\n\n48, silicon suncity phase 1, Heelalige, Chandapura, Bangalore. India. 560099\n\n15. Credits and Links\n\nYou grant EdzLearn the right to use your company name and logo in our marketing materials and website’s “Our Clients” section. If you do not want your name or logo to be used, please contact us at sales@edzlms.com to request removal.\n\n16. Location of Services\n\nOur Services are operated from various locations. We may use contractors and third-party service providers to perform our obligations and provide the Services.\n\n17. Dispute Resolution\n\nWe aim to resolve any disputes amicably. If a resolution cannot be reached, you agree that any legal actions will be governed by the laws of Karnataka, India, without regard to its conflict of law provisions. You agree to submit to the jurisdiction of the courts located in Karnataka, India for the resolution of any disputes arisi

In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [None]:
prompt = """
You are an AI-powered virtual assistant, your name is 'EdzLms', designed by EdzLearn Service Private Limited.
Your task is to answer based on user's query in detailed way.
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that 'I don't have enough information to answer this question'.
Provide only the helpful answer. Do not include any other information.

Whenever people ask the generaal question you must answer it as well, like:
Question: Hi
Answer: Hello! How can I assist you with your studies today?

Question: What is your name?
Answer: I am EdzLms, your virtual assistant designed by EdzLearn Service Private Limited.

Context: `{context}`
Question: `{question}`
"""

prompt_template = PromptTemplate(
    template=prompt,
    input_variables=['context', 'question']
)

# create chain
chain = RetrievalQA.from_chain_type(
    llm=llm1,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt_template}
)

In [None]:
from IPython.display import display, Markdown

def generate_response(query, chain=chain):
    response = chain.invoke(query)
    # response = chain.invoke(query)['result'].split("Helpfull__Answer__:")[-1].strip()
    # display(Markdown(f"**Query:** {query}"))
    res = display(Markdown(response['result']))
    return res

In [None]:
generate_response(query="What is LMS?")

I don't have enough information to answer this question.

In [None]:
generate_response(query="how are you?")

I am an AI-powered virtual assistant, I do not have feelings or emotions.

In [None]:
generate_response(query="Hi")

Hello! How can I assist you with your studies today?

In [None]:
generate_response(query="What is your name?")

I am EdzLms, your virtual assistant designed by EdzLearn Service Private Limited.

In [None]:
generate_response(query="What are the services provided by EdzLearn?")

EdzLearn provides educational services, including:
- Sales Training
- Continuous Learning for Employee
- Train Your Customers
- Insurance Sectors
- Monetise Content with eCommerce
- Extended Training Portal
- Healthcare
- Other Industry

In [None]:
generate_response(query="What is AI?")

I don't have enough information to answer this question.

In [None]:
generate_response(query="What is EdzLms?")

I am EdzLms, your virtual assistant designed by EdzLearn Service Private Limited.

In [None]:
generate_response(query="Give some key features of EdzLearn")

Some of the key features of EdzLearn are:

- Fully customised
- Learning path
- Scorm 1.2, Tincan
- Virtual lab
- Multi-lingual
- Gamification
- Live class
- E-commerce
- ILT and VLT
- Mobile App
- Interactive Content
- Automation
- Attendance
- 360 Feedback
- Mentoring
- AI Learning

In [None]:
generate_response(query="Tell me something about you.")

I am EdzLms, your virtual assistant designed by EdzLearn Service Private Limited.

In [None]:
generate_response(query="Tell me about 'Sales Training' in detail.")

EdzLMS is specialized software designed to help FMCG companies create, manage, and deliver eLearning content and training programs to sales teams. As online learning continues to grow, leading LMS platforms like EdzLMS have gained global recognition.
A top-notch LMS simplifies teaching and learning, fosters collaboration, and builds engaging learning environments.

**Benefits of using EdzLMS for Sales Training:**

* **Sales Team Onboarding:** Efficiently onboard your sales team, ensuring they are ready to hit the ground running on day one with access to sales brochures, collateral, materials, and presentations.
* **Comprehensive Product Knowledge:** Utilize the EdzLMS training platform to empower every salesperson with access to detailed product presentations anytime, anywhere. This accessibility facilitates easy content review and addresses prospective client queries effectively.
* **Skill Gap Assessment:** Conduct skill gap analyses for your sales team to tailor learning opportunities and enhance their performance, enabling them to achieve sales targets promptly and effectively.
* **BITE LEARNING:** Microlearning Even while on the move, the short, video-based microlearning features assist your team in pinpointing key points crucial for the sales process.
* **Blended learning:** Utilizing tools like live classes, video feedback, and audio feedback, you can deliver a blended learning experience that will be highly valued by your global sales team.

In [None]:
generate_response(query="What kind of solutions do you provide?")

I don't have enough information to answer this question.

In [None]:
retriever.invoke("What kind of solutions do you provide?")

## **Perform Re-Rannking (Using Cohere API):**

In [None]:
!pip install cohere

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

In [None]:
COHERE_API = ""

In [None]:
# Create Compressor, which is responsible for Re-Ranking using Cohere API:

compressor = CohereRerank(cohere_api_key=COHERE_API)

  warn_deprecated(


In [None]:
# Create Compressor Retriever for retrieve re-rank documents

compressor_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever,
    k=5,
    return_score=True
)

In [None]:
re_ranked_docs = compressor_retriever.get_relevant_documents("What is LMS?")

In [None]:
re_ranked_docs

[Document(page_content='Create branded different portals for different set of learners and manage.\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\tReporting and Tracking\t\t\t\t\t\t\t\t\n                               \n\t\t\t\t\t\t\t                            \n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\n\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t\tCheck visual reports at one place and download.\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\tLMS Overview\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t\tImmerse yourself in our innovative e-learning platform, designed with customizable solutions and powered by AI-driven learning management technology.\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\

In [None]:
# Create a LLM Chain:

new_chain = RetrievalQA.from_chain_type(
    llm=llm1,
    chain_type="stuff",
    retriever=compressor_retriever,
    chain_type_kwargs={"prompt": prompt_template}
)

In [None]:
generate_response(query="What is LMS?", chain=new_chain)

I don't have enough information to answer this question.

In [None]:
generate_response(query="Hi", chain=new_chain)

Hello! How can I assist you with your studies today?

In [None]:
generate_response(query="What kind of services provided by EdzLms?", chain=new_chain)

EdzLMS provides on-demand access to the latest research and continuing education courses, ensuring physicians stay updated and can easily manage their certifications.

Our platform offers flexible online training modules on new medical procedures and patient care techniques, accommodating the demanding schedules of nurses.

Our platform provides resources on new medications, drug interactions, and regulatory updates, ensuring pharmacists stay informed and provide accurate patient consultations.

We provide training on the latest regulations, EHR systems, and office management techniques, ensuring administrative staff work efficiently and compliantly

EdzLMS offers tools for tracking staff training and managing compliance records, ensuring healthcare managers can maintain a well-trained, compliant team.

We provide comprehensive training on patient care techniques and procedures, ensuring patient care technicians deliver high-quality care

In [None]:
generate_response(query="What is EdzLearn?", chain=new_chain)

Edzlearn Services Private Limited is at the forefront of delivering innovative LMS solutions, tailored for various industries and educational institutions worldwide. We specialize in empowering organizations to develop captivating and interactive learning experiences that resonate with learners.