# **Simple RAG Application** using LangChain and OpenAI

In [1]:
# Import necessary libraries
import os
from dotenv import load_dotenv

load_dotenv("../.env")

True

### Initialize OpenAI LLM

In [2]:
from langchain_openai import ChatOpenAI

# Set OpenAI API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Initialize the ChatOpenAI model
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0
)

### Initialize Embedding Model

In [3]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

### Create and Embed Documents

In [4]:
from langchain_core.documents import Document

# Define a list of documents with content and metadata
documents = [
    Document(
        page_content="The T20 World Cup 2024 is in full swing, bringing excitement and drama to cricket fans worldwide.India's team, captained by Rohit Sharma, is preparing for a crucial match against Ireland, with standout player Jasprit Bumrah expected to play a pivotal role in their campaign.The tournament has already seen controversy, particularly concerning the pitch conditions at Nassau County International Cricket Stadium in New York, which came under fire after a low-scoring game between Sri Lanka and South Africa.",
        metadata={"source": "cricket news"},
    ),
    Document(
        page_content="The world of football is buzzing with excitement as major tournaments and league matches continue to captivate fans globally.In the UEFA Champions League, the semi-final matchups have been set, with defending champions Real Madrid set to face Manchester City, while Bayern Munich will take on Paris Saint-Germain.Both ties promise thrilling encounters, featuring some of the best talents in world football.",
        metadata={"source": "football news"},
    ),
    Document(
        page_content="As election season heats up, the latest developments reveal a highly competitive atmosphere across several key races.The presidential election has seen intense campaigning from all major candidates, with recent polls indicating a tight race.Incumbent President Jane Doe is seeking re-election on a platform of economic stability and healthcare reform, while her main rival, Senator John Smith, focuses on education and climate change initiatives.",
        metadata={"source": "election news"},
    ),
    Document(
        page_content="The AI revolution continues to transform industries and reshape the global economy.Significant advancements in artificial intelligence have led to breakthroughs in healthcare, with AI-driven diagnostics improving patient outcomes and reducing costs.Autonomous systems are becoming increasingly prevalent in logistics and transportation, enhancing efficiency and safety.",
        metadata={"source": "ai revolution news"},
    ),
]

# langchainlge document object ekt data tkk pass krnw

In [5]:
# Create a vector store using the documents and embedding model
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(
    documents,
    embedding=embedding_model,
)

# store embedded data in vector store

### Perform Similarity Search

In [8]:
results = vectorstore.similarity_search("test match")

for result in results:
  print("------------------------")
  print(result.page_content)
  print(result.metadata)

------------------------
The T20 World Cup 2024 is in full swing, bringing excitement and drama to cricket fans worldwide.India's team, captained by Rohit Sharma, is preparing for a crucial match against Ireland, with standout player Jasprit Bumrah expected to play a pivotal role in their campaign.The tournament has already seen controversy, particularly concerning the pitch conditions at Nassau County International Cricket Stadium in New York, which came under fire after a low-scoring game between Sri Lanka and South Africa.
{'source': 'cricket news'}
------------------------
The world of football is buzzing with excitement as major tournaments and league matches continue to captivate fans globally.In the UEFA Champions League, the semi-final matchups have been set, with defending champions Real Madrid set to face Manchester City, while Bayern Munich will take on Paris Saint-Germain.Both ties promise thrilling encounters, featuring some of the best talents in world football.
{'source'

In [9]:
results = vectorstore.similarity_search("machine learning")

for result in results:
  print("------------------------")
  print(result.page_content)
  print(result.metadata)

------------------------
The AI revolution continues to transform industries and reshape the global economy.Significant advancements in artificial intelligence have led to breakthroughs in healthcare, with AI-driven diagnostics improving patient outcomes and reducing costs.Autonomous systems are becoming increasingly prevalent in logistics and transportation, enhancing efficiency and safety.
{'source': 'ai revolution news'}
------------------------
The world of football is buzzing with excitement as major tournaments and league matches continue to captivate fans globally.In the UEFA Champions League, the semi-final matchups have been set, with defending champions Real Madrid set to face Manchester City, while Bayern Munich will take on Paris Saint-Germain.Both ties promise thrilling encounters, featuring some of the best talents in world football.
{'source': 'football news'}
------------------------
As election season heats up, the latest developments reveal a highly competitive atmosp

### Embed Query and Perform Similarity Search by Vector

In [10]:
# Embed a query using the embedding model
query_embedding = embedding_model.embed_query("machine learning")

query_embedding[:10]

[-0.012126085348427296,
 -0.011351445689797401,
 0.002754438668489456,
 -0.047455597668886185,
 0.03367893397808075,
 0.004483969882130623,
 0.004346918314695358,
 0.03286854177713394,
 -0.019079962745308876,
 0.022833984345197678]

In [11]:
# Print the length of the query embedding
len(query_embedding)

1536

In [12]:
results = vectorstore.similarity_search_by_vector(query_embedding)

for result in results:
  print("------------------------")
  print(result.page_content)
  print(result.metadata)

------------------------
The AI revolution continues to transform industries and reshape the global economy.Significant advancements in artificial intelligence have led to breakthroughs in healthcare, with AI-driven diagnostics improving patient outcomes and reducing costs.Autonomous systems are becoming increasingly prevalent in logistics and transportation, enhancing efficiency and safety.
{'source': 'ai revolution news'}
------------------------
The world of football is buzzing with excitement as major tournaments and league matches continue to captivate fans globally.In the UEFA Champions League, the semi-final matchups have been set, with defending champions Real Madrid set to face Manchester City, while Bayern Munich will take on Paris Saint-Germain.Both ties promise thrilling encounters, featuring some of the best talents in world football.
{'source': 'football news'}
------------------------
As election season heats up, the latest developments reveal a highly competitive atmosp

### Create Retriever

In [15]:
# Create a retriever from the vector
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)
# k=1 -> the best match (1 response)
# k=2 -> 2 matches

# Perform batch retrieval using the retriever
batch_results = retriever.batch(["machine learning"])

for result in batch_results:
  print("------------------------")
  for doc in result:
    print(doc.page_content)
    print(doc.metadata)

------------------------
The AI revolution continues to transform industries and reshape the global economy.Significant advancements in artificial intelligence have led to breakthroughs in healthcare, with AI-driven diagnostics improving patient outcomes and reducing costs.Autonomous systems are becoming increasingly prevalent in logistics and transportation, enhancing efficiency and safety.
{'source': 'ai revolution news'}


In [16]:
# Create a retriever from the vector
# retriever = vectorstore.as_retriever(
#     search_type="similarity",
#     search_kwargs={"k": 1},
# )

# Perform batch retrieval using the retriever
batch_results = retriever.batch(["machine learning", "test match"])

for result in batch_results:
  print("------------------------")
  for doc in result:
    print(doc.page_content)
    print(doc.metadata)

------------------------
The AI revolution continues to transform industries and reshape the global economy.Significant advancements in artificial intelligence have led to breakthroughs in healthcare, with AI-driven diagnostics improving patient outcomes and reducing costs.Autonomous systems are becoming increasingly prevalent in logistics and transportation, enhancing efficiency and safety.
{'source': 'ai revolution news'}
------------------------
The T20 World Cup 2024 is in full swing, bringing excitement and drama to cricket fans worldwide.India's team, captained by Rohit Sharma, is preparing for a crucial match against Ireland, with standout player Jasprit Bumrah expected to play a pivotal role in their campaign.The tournament has already seen controversy, particularly concerning the pitch conditions at Nassau County International Cricket Stadium in New York, which came under fire after a low-scoring game between Sri Lanka and South Africa.
{'source': 'cricket news'}


### Create Prompt Template

In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Define a message template for the chatbot
message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

# Create a chat prompt template from the message
prompt = ChatPromptTemplate.from_messages([("human", message)])

### Chain Retriever and Prompt Template with LLM

In [None]:
chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

# RunnablePassthrough -> chain ek use krl call krddi pass krana text ek ("current state of 2024 t20 world cup")

In [19]:
response = chain.invoke("current state of 2024 t20 world cup")

print(response.content)

The current state of the 2024 T20 World Cup is that it is in full swing, with India's team led by captain Rohit Sharma preparing for a crucial match against Ireland. Jasprit Bumrah is expected to play a key role in their campaign. There has been controversy surrounding the pitch conditions at Nassau County International Cricket Stadium in New York, particularly after a low-scoring game between Sri Lanka and South Africa.


In [20]:
response = chain.invoke("How are you?")

print(response.content)

I am doing well, thank you.
