<a href="https://colab.research.google.com/github/darkwingpatil/Ml_hackethons/blob/main/RAG_LCEL_Updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **RAG : Retrieval Augmented Generation**

OBJECTIVES:

1. Load the Documents
2. Splitting the documents into chunks
3. Embedding the chunks and storing them in vector db
4. Retrieving the relevant chunks to the query
 * Addressing Diversity
 * Addressing Specificity
5. Connecting with LLM to get a final grounded answer
6. Re-ranking example with open source model

In [2]:
!pip install openai
!pip install langchain-core
!pip install langchain-openai
!pip install langchain-community
!pip install chromadb
!pip install pypdf
!pip install transformers

Collecting langchain-openai
  Downloading langchain_openai-0.2.5-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-core<0.4.0,>=0.3.15 (from langchain-openai)
  Downloading langchain_core-0.3.15-py3-none-any.whl.metadata (6.3 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading langchain_openai-0.2.5-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.15-py3-none-any.whl (408 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m408.7/408.7 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collec

In [2]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [3]:
!pip install sentence-transformers
!pip install  faiss-cpu
!pip install text-generation

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0
Collecting text-generation
  Downloading text_generation-0.7.0-py3-none-any.whl.metadata (8.5 kB)
Downloading text_generation-0.7.0-py3-none-any.whl (12 kB)
Installing collected packages: text-generation
Successfully installed text-generation-0.7.0


In [4]:
import openai
import os
from datasets import load_dataset

In [None]:
f = open('/content/ts_openapi_key.txt')
api_key = f.read()
os.environ['OPENAI_API_KEY'] = api_key
openai.api_key= os.getenv('OPENAI_API_KEY')

In [None]:
from langchain_openai import ChatOpenAI

### **Loading the documents**

[PDF Loader](https://python.langchain.com/docs/how_to/document_loader_pdf/)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
# Load PDF
loaders = [
    # Duplicate documents on purpose
    PyPDFLoader("/content/pca_d1.pdf"),
    PyPDFLoader("/content/ens_d2.pdf"),
    PyPDFLoader("/content/ens_d2.pdf"),
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [None]:
print(docs[0].page_content)

### **Splitting of document**

[Recursively split by character](https://python.langchain.com/docs/how_to/recursive_text_splitter/)

[Split by character](https://python.langchain.com/docs/how_to/character_text_splitter/)

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
# Split
#from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

In [None]:
splits = text_splitter.split_documents(docs)
print(len(splits))
print(len(splits[0].page_content) )
splits[0].page_content

In [None]:
splits

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from datasets import load_dataset
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader


# Load your dataset from Hugging Face
# dataset = load_dataset("WingPatil/guanaco-dark-mat-lat-1")  # Replace with the actual dataset name
# dataset
# # Initialize your text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
loader = HuggingFaceDatasetLoader(
    'WingPatil/guanaco-dark-mat-lat-1',
    'text'
)

documents  = loader.load()
data_chunks = text_splitter.split_documents(documents)
data_chunks

# # Extract and split data for each item
# data_chunks = []
# for item in dataset['train']:  # Access the desired split, e.g., 'train'
#     text = item['text']  # Replace 'text' with the actual column name containing text
#     chunks = text_splitter.split_text(text)
#     data_chunks.extend(chunks)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/273 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/494k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1078 [00:00<?, ? examples/s]

[Document(metadata={}, page_content='"<s>[INST] Hello, I was wondering if a user should have the same content libraries as the plan they are on. [/INST] If I\'m understanding you correctly, yes and no. A user on a plan may or may not have a product license assigned to them. The potential licenses they can have are assigned to the plan, but provisioned to the user. And the content libraries a user can access are tied to the product license(s) the user has been assigned. </s>"'),
 Document(metadata={}, page_content='"<s>[INST] <@U05JG26Q5CL> has joined the channel [/INST] [] </s>"'),
 Document(metadata={}, page_content='"<s>[INST] Hi team. Plan `morneau-shepell-ltd-c4ba9` is showing two base products under their People Directory, and I\\u2019m not sure why:\\n\\u2022 <https://app.pluralsight.com/subscription/plans/morneau-shepell-ltd-c4ba9|Plan Subscriptions page> shows the plan should have 205 Business ENT licenses + 205 Labs add-ons\\n\\u2022 <https://app.pluralsight.com/plans/morneau-

### **Embeddings**

Let's take our splits and embed them.

In [8]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cuda'})

### **Understanding similarity search with a toy example**

In [9]:
sentence1 = " Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible?"
sentence2 = "Hi team i want to know is there any way to sign in via sso rather then conventional login method"
sentence3 = "Hi team, I am hoping to add a new user permission/enabled feature for users making requests against this endpoint `${PS_AUTH_ENDPOINT}/api/v5/users/${userHandle}` - is that something you can help with?"

In [10]:
embedding1 = embeddings.embed_query(sentence1)
embedding2 = embeddings.embed_query(sentence2)
embedding3 = embeddings.embed_query(sentence3)

In [11]:
len(embedding1), len(embedding2), len(embedding3)

(384, 384, 384)

In [None]:
embedding1

[-0.0036041128914803267,
 -0.01474357582628727,
 -0.06543663144111633,
 0.015418014489114285,
 0.05502630025148392,
 0.024719147011637688,
 0.03700454533100128,
 0.003921367693692446,
 -0.008054926991462708,
 0.0037865315098315477,
 -0.008618907071650028,
 0.05158481374382973,
 0.010317721404135227,
 0.05656490847468376,
 0.061862554401159286,
 -0.00800695363432169,
 0.01192796602845192,
 0.002263538772240281,
 0.05864851549267769,
 0.1035747230052948,
 0.048002611845731735,
 -0.08128052949905396,
 -0.07789265364408493,
 0.03867329657077789,
 -0.00623359065502882,
 -0.12954512238502502,
 0.06765981018543243,
 0.12560631334781647,
 -0.015625260770320892,
 0.07704520970582962,
 -0.02008511871099472,
 0.08120304346084595,
 0.009682216681540012,
 0.005478506907820702,
 0.044970668852329254,
 -0.031552620232105255,
 -0.025945883244276047,
 0.01354120671749115,
 -0.08013932406902313,
 -0.02994975447654724,
 0.026267152279615402,
 -0.02670617401599884,
 0.00022525839449372143,
 0.026792427524

In [12]:
import numpy as np

def cosine_similarity(vector1, vector2):
    # Ensure that the vectors are numpy arrays
    vector1 = np.array(vector1)
    vector2 = np.array(vector2)

    # Calculate the dot product of the vectors
    dot_product = np.dot(vector1, vector2)

    # Calculate the magnitude (norm) of the vectors
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)

    # Compute cosine similarity
    if norm_vector1 == 0 or norm_vector2 == 0:
        return 0  # Avoid division by zero
    return dot_product / (norm_vector1 * norm_vector2)

In [13]:
cosine_similarity(embedding1, embedding2), cosine_similarity(embedding1, embedding3), cosine_similarity(embedding2, embedding3)

(0.6183150949823757, 0.25260460743872987, 0.22902608876257838)

### **Vectorstores**

In [14]:
from langchain_community.vectorstores import Chroma # Light-weight and in memory

In [15]:
persist_directory = 'docs/chroma/'
!rm -rf ./docs/chroma  # remove old database files if any


In [16]:
vectordb = Chroma.from_documents(
    documents=data_chunks, # splits we created earlier
    embedding=embeddings,
    persist_directory=persist_directory, # save the directory
)

In [17]:
print(vectordb._collection.count()) # same as number of splites

3723


### **Similarity Search**

In [22]:
question = "A customer that uses SSO sign in wants the invite link to open directly to their SSO landing?"

In [19]:
docs = vectordb.similarity_search(question,k=6) # k --> No. of doc as return
print(len(docs))
for i in range(len(docs)):
  print(docs[i].page_content,'\n','####################')

6
"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it's possible) [/INST] I don\u2019t think this has been done before. The point of the invite flow is that a user 
 ####################
"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it's possible) [/INST] I don\u2019t think this has been done before. The point of the 

### **Edge case where failure may happen**

1. Lack of Diversity : Semantic search fetches all similar documents, but does not enforce diversity.

    - Notice that we're getting duplicate chunks (because of the duplicate `ens_d2.pdf` in the index). `docs[0]` and `docs[1]` are indentical.

  **Addressing Diversity - MMR-Maximum Marginal Relevance**

2. Lack of spefificity:  The question may be from a particular doc but answer may contain information from other doc.

  **Addressing Specificity: Working with metadata - Manually**

  **Working with metadata using self-query retriever -Automatically**

In [20]:
question= 'how ensemble method works?'
docs = vectordb.similarity_search(question,k=3) # Without MMR
for i in range(len(docs)):
  print(docs[i].page_content,'\n','####################')

a user in V1 LearnerHistory. They don\u2019t show up in the raw data for V1 LearnerHistory. The `learner` topic(s) contain more detailed information about learners while `learnerHistory` contains only the information about when major changes were made to a learner.  (note that in our context a `learner`  is a plan user who has been provisioned at least one content license, if you need info about people without content licenses you'll want one of our other topics).\nCan you give me an example of 
 ####################
between these topics, but we intentionally made independent topics for each of these since these are optional relationships and represent specific concepts in our codebase. Logically speaking, a `PlanUser` may have 0 or 1 `Leader` records, and 0 or 1 `Learner` records, and each of these 3 concepts, though connected, can be considered logically distinct.\n\n\u2022 Yes this is possible, and one of the main reasons we constructed these topics this way\n\u2022 Yes, and most of

**Example 1. Addressing Diversity - MMR-Maximum Marginal Relevance**

In [23]:
docs_with_mmr=vectordb.max_marginal_relevance_search(question, k=3, fetch_k=6) # With MMR
for i in range(len(docs_with_mmr)):
  print(docs_with_mmr[i].page_content,'\n','####################')

"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it's possible) [/INST] I don\u2019t think this has been done before. The point of the invite flow is that a user 
 ####################
keys and they have a workday connector API key setup so we were also guessing it could be that <@U0439T42QQ1> When checking for pending invites after the user signs in via SSO, do you check if a license got assigned to them successfully via SSO autoprovisioning?\n\nThere is a small edge case where this user <mailto:Erika.Wilkinson@cellularsales.com|Erika.Wilkinson@cellularsales.com> got an invitation issued for them, for the last license of the plan.\n\nPlan ID: 
 #######

**Example 2. Addressing Specificity: Working with metadata - Manually**

In [None]:
# Without metadata information
question = "what is the role of variance in pca?"
docs = vectordb.similarity_search(question,k=7)
for doc in docs:
    print(doc.metadata) # metadata contains information about from which doc the answer has been fetched

Notice above, the 2nd last information is from 'ens_d2' doc.

In [None]:
# With metadata information
question = "what is the role of variance in pca?"
docs = vectordb.similarity_search(
    question,
    k=7,
    filter={"source":'/content/pca_d1.pdf'} # manually passing metadata, using metadata filter.
)

for doc in docs:
    print(doc.metadata)

[**Addressing Specificity -Automatically: Working with metadata using self-query retriever**](https://python.langchain.com/docs/how_to/self_query/)

### **Additional tricks: Compression**

Another approach for improving the quality of retrieved docs is compression. Information most relevant to a query may be buried in a document with a lot of irrelevant text. Passing that full document through your application can lead to more expensive LLM calls and poorer responses.

[Contextual compression](https://python.langchain.com/docs/how_to/contextual_compression/) is meant to fix this.

### **Better Approach**

**[Vectorstore as a retriever](https://python.langchain.com/docs/how_to/vectorstore_retriever/)**

In [None]:
# Without MMR
# question = "What is principal component analysis?"
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
docs = retriever.invoke(question)
docs

[Document(metadata={}, page_content='"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it\'s possible) [/INST] I don\\u2019t think this has been done before. The point of the invite flow is that a user'),
 Document(metadata={}, page_content='"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it\'s possible) [/INST] I don\\u

In [24]:
# With MMR
retriever = vectordb.as_retriever(search_type="mmr",search_kwargs={"k": 5, "fetch_k":10})
docs = retriever.invoke(question)
docs

[Document(metadata={}, page_content='"<s>[INST] Hi team. Question from a customer. A customer that uses SSO sign in wants the invite link to open directly to their SSO landing [<https://app.pluralsight.com/sso/protivitius>] instead of the PS login page as it is causing confusion for some users. Is this possible? I could not find anything definitive. Would you be the team that handles customizing the link on an invite (if it\'s possible) [/INST] I don\\u2019t think this has been done before. The point of the invite flow is that a user'),
 Document(metadata={}, page_content='The point of the invite flow is that a user isn\\u2019t expected to go through SSO for license redemption (two separate flows), so why would the invite link redirect to the SSO sign in page. Not that it\\u2019 s impossible to do it.\\n\\nI think what I\\u2019ve seen most customers do in this case is just send an email themselves with the SSO sign in instructions, rather than depend on the invite function </s>"'),
 Do

### **Retrieval + Question Answering :  Connecting with LLMs**

In [25]:
retriever = vectordb.as_retriever(search_type="mmr",search_kwargs={"k": 7, "fetch_k":15})

In [26]:
llm_name = "llama3.1-70b"
print(llm_name)

llama3.1-70b


In [54]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Downloading langchain_huggingface-0.1.2-py3-none-any.whl (21 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.1.2


In [102]:
!pip install huggingface_hub
from huggingface_hub import login

# Log in to Hugging Face


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [55]:
from langchain_huggingface import HuggingFaceEndpoint

In [1]:
# from langchain_openai import ChatOpenAI
from langchain.llms import OpenAI


llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    max_new_tokens = 512,
    top_k = 30,
    temperature = 0.1,
    repetition_penalty = 1.03,
)
# HuggingFaceH4/zephyr-7b-beta
# mistralai/Mistral-7B-Instruct-v0.2    too good
#  meta-llama/Llama-3.2-3B-Instruct

ModuleNotFoundError: No module named 'langchain_community'

In [76]:
from langchain_core.prompts import PromptTemplate     #
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

In [84]:
# Build prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)

### **Creating final RAG Chain**

In [69]:
from langchain_core.runnables import RunnableLambda

In [104]:
rag_chain= {"context":RunnableLambda(lambda x:x["question"])| retriever,
         "question": lambda x:x["question"]}|QA_PROMPT | llm |StrOutputParser()

In [105]:
# Question 1
try:
  response=rag_chain.invoke({"question" :"A customer that uses SSO sign in wants the invite link to open directly to their SSO landing?"})
  print(response)
except Exception as e:
  print(e)

 (Request ID: SPaiLnHBAP0otouIDDvlA)

Bad request:
Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query.


* Checking what is retreived from the retriever:

In [None]:
(RunnableLambda(lambda x:x["question"])| retriever).invoke({"question" :"What is principal component analysis?"})

[Document(metadata={'page': 1, 'source': '/content/pca_d1.pdf'}, page_content='2 \n \n \n \nSo, what does Principal Component Analysis (PCA) do? \nPCA finds a new set of dimensions (or a set of basis of views) such that all the dimensions are  \northogonal (and hence linearly independent) and ranked according to the variance of data along  \nthem. It means more important principle axis occurs first. (more important = more variance/more  \nspread out data) \n \nHow does PCA work? \n• Calculate the covariance matrix X of data points.'),
 Document(metadata={'page': 0, 'source': '/content/pca_d1.pdf'}, page_content='1 \n \n \nN \n \n1 Principal Component Analysis \nIn real world data analysis tasks we analyze complex data i.e. multi dimensional data. We plot the  \ndata and find various patterns in it or use it to train some machine learning models.  One way to  \nthink about dimensions is that suppose you have an data point x , if we consider this data point as \na physical object then di

In [None]:
# Question 2
response=rag_chain.invoke({"question" :"how ensemble method works?"})
print(response)

* Checking what is retreived from the retriever:

In [None]:
(RunnableLambda(lambda x:x["question"])| retriever).invoke({"question" :"how ensemble method works?"})

In [None]:
# Question 3
response=rag_chain.invoke({"question" :"What is Linear Regression? "})
print(response)

* Checking what is retreived from the retriever:

In [None]:
(RunnableLambda(lambda x:x["question"])| retriever).invoke({"question" :"What is Linear Regression? "})

[**Details of Chroma through LangChain**](https://python.langchain.com/docs/integrations/vectorstores/chroma/)

### **Download the vector DB**

In [None]:
# Zip the entire folder
!zip -r /content/docs.zip /content/docs

In [None]:
from google.colab import files
files.download("/content/docs.zip")

### **Upload the vector db from previous step and unzip**

In [None]:
!unzip /content/docs.zip  -d /

In [None]:
embedding = OpenAIEmbeddings()

vectordb = Chroma(persist_directory = 'docs/chroma/',
                  embedding_function = embedding
                  )

### **Re-ranking example wiht Open Source model**

* [Retrieve & Re-Rank](https://www.sbert.net/examples/applications/retrieve_rerank/README.html)
* [MS MARCO Cross-Encoders](https://www.sbert.net/docs/pretrained-models/ce-msmarco.html) for Re-ranking
  * Usage with **SentenceTransformers
Pre-trained models** can be used like this:

In [None]:
!pip install sentence-transformers



In [None]:
import torch

In [None]:
# Define a query and some candidate sentences
query = "I love programming in Python."

# Some toy data representing candidate sentences/documents
candidates = [
    "Python is a great programming language.",
    "I enjoy long walks on the beach.",
    "Machine learning can be used to build models.",
    "I like writing code in Python.",
    "Artificial intelligence is fascinating."
]

In [None]:
Paragraph1=candidates[0]
Paragraph2=candidates[1]
Paragraph3=candidates[2]

In [None]:
from sentence_transformers import CrossEncoder
model_name='cross-encoder/ms-marco-TinyBERT-L-2-v2'
model = CrossEncoder(model_name, max_length=512)
scores = model.predict([(query, Paragraph1), (query, Paragraph2), (query, Paragraph3)])
print(scores)

* **Usage with Transformers**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

features = tokenizer([query, query, query], [Paragraph1, Paragraph2, Paragraph3], padding=True, truncation=True, return_tensors="pt")

model.eval()
with torch.no_grad():
    scores = model(**features).logits
    print(scores)