In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACKING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

Calling llm

In [2]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")
llm_response = llm.invoke("tell me a joke")

llm_response

AIMessage(content='Why did the scarecrow win an award? \n\nBecause he was outstanding in his field!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 11, 'total_tokens': 29, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bba3c8e70b', 'finish_reason': 'stop', 'logprobs': None}, id='run-962b93ef-d4d4-4295-921a-5c0c163a694b-0', usage_metadata={'input_tokens': 11, 'output_tokens': 18, 'total_tokens': 29, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

parsing output

In [3]:
from langchain_core.output_parsers import StrOutputParser

output = StrOutputParser()

output.invoke(llm_response)

'Why did the scarecrow win an award? \n\nBecause he was outstanding in his field!'

chain

In [8]:
chain = llm | output

chain.invoke("tell me a joke")

"Why don't scientists trust atoms?\n\nBecause they make up everything!"

Structured Output

In [9]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description="Name and model of the phone")
    rating: float = Field(description="Overall rating out of 5")
    pros: List[str] = Field(description="List of positive aspects")
    cons: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")

review_text = """
    Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
    colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
    stronger. Battery life's solid, lasts me all day no problem.
    Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
    Also, still getting used to the new button layout, keep hitting Bixby by mistake.
    Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from
    being perfect. If you're due for an upgrade, definitely worth checking out!
    """

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
print(output)
print(output.pros)


phone_model='Samsung Galaxy S21' rating=4.0 pros=['Gorgeous screen', 'Vibrant colors', 'Insane camera performance, especially at night', 'Strong battery life'] cons=['Pricey', 'No charger included', 'New button layout takes getting used to'] summary='Overall, a solid phone with a few annoying quirks that keep it from being perfect.'
['Gorgeous screen', 'Vibrant colors', 'Insane camera performance, especially at night', 'Strong battery life']


Prompt Template

In [10]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
prompt.invoke({"topic":"programming"})


ChatPromptValue(messages=[HumanMessage(content='Tell me a short joke about programming', additional_kwargs={}, response_metadata={})])

In [4]:
output_parser = StrOutputParser()

In [14]:
chain = prompt | llm | output
chain.invoke({"topic": "programming"})

'Why do programmers prefer dark mode?\n\nBecause light attracts bugs!'

LLM Messages

In [15]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant that tells jokes."),
    HumanMessage(content="Tell me about programming")
]
response = llm.invoke(messages)
print(response)

template = ChatPromptTemplate([
    ("system", "You are a helpful assistant that tells jokes."),
    ("human", "Tell me about {topic}")
])
chain = template | llm
response = chain.invoke({"topic": "programming"})
print(response)


content='Sure! Programming is the process of creating instructions for computers to follow. These instructions are written in programming languages, which are designed to communicate with computers and create software applications, websites, games, and more.\n\nNow, for a little programming humor:\n\nWhy do programmers prefer dark mode?\n\nBecause light attracts bugs! 🐛💻\n\nIf you have any specific questions about programming or want to hear more jokes, feel free to ask!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 87, 'prompt_tokens': 24, 'total_tokens': 111, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bba3c8e70b', 'finish_reason': 'stop', 'logprobs': None} id='run-fb1e0b6b-f8bd-4ecb-8007-9daf82f06098-0' usage_met

Loading documents 

In [5]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents



folder_path = r"d:\Langchain\dataDocs"
documents = load_documents(folder_path)

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = splitter.split_documents(documents)

print(f"Loaded {len(documents)} raw documents from the folder.")
print(f"After splitting, there are {len(split_docs)} document chunks.")


Loaded 31 raw documents from the folder.
After splitting, there are 106 document chunks.


In [6]:
from langchain_community.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

doc_embedings = embeddings.embed_documents([split.page_content for split in split_docs])

print(f"created embeddings for {len(doc_embedings)} document chunk")

  embeddings = OpenAIEmbeddings()


created embeddings for 106 document chunk


In [7]:
doc_embedings[0]

[-0.0022305974689207225,
 -0.004921450789510548,
 0.03303322658173616,
 -0.02427521326375081,
 0.014820241031448104,
 0.0059767518662071336,
 -0.04818222259122434,
 -0.014820241031448104,
 -0.026721143669352763,
 -0.023170599592273963,
 0.01867324140195716,
 0.016253609680965023,
 0.009882352399903304,
 0.0044710576430619965,
 0.0007553128460853622,
 -0.006831513132730823,
 0.021250674543833135,
 -0.026602793069351132,
 0.007350944750061989,
 -0.007160267179448397,
 -0.016832216132635757,
 0.015411998688068737,
 -0.009481271848235016,
 -0.015438299235323541,
 -0.0043033930512963595,
 0.010480685193337636,
 0.03108700098604053,
 -0.03450604605213529,
 0.0016076082616809457,
 -0.008251731508620339,
 0.00783750068332465,
 0.011552424112068475,
 -0.006916988979986443,
 0.003997651517765492,
 -0.011907478706040853,
 0.005812374842848346,
 0.030455793439860185,
 -0.018226135823915458,
 0.010395209346082016,
 -0.045788891417487,
 0.027510154499061936,
 0.02698414727925582,
 0.0036195841777757

create and persist chroma vector store

In [8]:
from langchain_community.vectorstores import Chroma

embeding_function = OpenAIEmbeddings()
collection_name = "my_collection"
vectorstore = Chroma.from_documents(collection_name=collection_name, documents=split_docs, embedding=embeding_function)


getting the matching document

In [None]:
query = "what is the contribution of IBM in quantum computing?"
search_results = vectorstore.similarity_search(query, k=2)

print(search_results[0].page_content)

'environmental modeling, and discovering new materials. Daimler Mercedes -Benz \nhas been using a quantum computer to develop a new battery for electric vehicles. \nVolkswagen has investigated the use of qu antum computers to find a solution for \nthe optimization of traffic flows in Beijing, China.  \nIBM, which owns 53-qubit gate-based quantum computers, has been collaborating \nwith more than 100 organizations, including the companies mentioned above, \nacross industries and made their 5-qubit and 20-qubit quantum computers available \nvia their cloud service called “IBM Q Experience” (IBM, n.d.). More than 200 third \nparty research paper s on practical applications have been published with IBM \nquantum computers. This cloud service provides a graphical user interface in a \nbrowser to build quantum circuits on IBM’s simulators or real quantum computers \nby dragging and dropping the icons, whic h represent quantum logic gates (e.g.,'

now get the result

In [11]:
retriever = vectorstore.as_retriever(search_kwargs={"k":2})
retriever.invoke("what is the contribution of IBM in quantum computing?")

[Document(metadata={'page': 2, 'source': 'd:\\Langchain\\dataDocs\\Quantum Computing- Principles and Applications.pdf'}, page_content='environmental modeling, and discovering new materials. Daimler Mercedes -Benz \nhas been using a quantum computer to develop a new battery for electric vehicles. \nVolkswagen has investigated the use of qu antum computers to find a solution for \nthe optimization of traffic flows in Beijing, China.  \nIBM, which owns 53-qubit gate-based quantum computers, has been collaborating \nwith more than 100 organizations, including the companies mentioned above, \nacross industries and made their 5-qubit and 20-qubit quantum computers available \nvia their cloud service called “IBM Q Experience” (IBM, n.d.). More than 200 third \nparty research paper s on practical applications have been published with IBM \nquantum computers. This cloud service provides a graphical user interface in a \nbrowser to build quantum circuits on IBM’s simulators or real quantum compu

# Building the RAG Chain

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """Answer the question based only on the following context:
{context}
Question: {question}
Answer: """

prompt = ChatPromptTemplate.from_template(template)

def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [13]:
question = "what is the contribution of IBM in quantum computing?"

response = rag_chain.invoke(question)
print(f"Question: {question}")
print(f"Answer: {response}")


Question: what is the contribution of IBM in quantum computing?
Answer: IBM has made significant contributions to quantum computing by developing 53-qubit gate-based quantum computers and collaborating with over 100 organizations across various industries. They have made their quantum computers available through the cloud service called "IBM Q Experience," which allows users to build quantum circuits using a graphical user interface. Additionally, more than 200 third-party research papers on practical applications of IBM's quantum computers have been published, showcasing the impact of their technology in the field.


# Coversational RAG

# Creating a History-Aware Retriever

In [None]:
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain

contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()
print(contextualize_chain.invoke({"input": "how many qubit did they done?", "chat_history": []}))


How many qubits have been created or developed?


In [16]:
from langchain.chains import create_retrieval_chain

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [19]:
result = rag_chain.invoke({
    "input": "where is IBM located?",  # User's question
    "chat_history": []  # Chat history should be an empty list or populated with past messages
})

print(result)


{'input': 'where is IBM located?', 'chat_history': [], 'context': [Document(metadata={'page': 2, 'source': 'd:\\Langchain\\dataDocs\\Quantum Computing- Principles and Applications.pdf'}, page_content='environmental modeling, and discovering new materials. Daimler Mercedes -Benz \nhas been using a quantum computer to develop a new battery for electric vehicles. \nVolkswagen has investigated the use of qu antum computers to find a solution for \nthe optimization of traffic flows in Beijing, China.  \nIBM, which owns 53-qubit gate-based quantum computers, has been collaborating \nwith more than 100 organizations, including the companies mentioned above, \nacross industries and made their 5-qubit and 20-qubit quantum computers available \nvia their cloud service called “IBM Q Experience” (IBM, n.d.). More than 200 third \nparty research paper s on practical applications have been published with IBM \nquantum computers. This cloud service provides a graphical user interface in a \nbrowser t

# Building a Multi-User Chatbot with SQLite Storage

## Setting Up the SQLite Database


In [20]:
import sqlite3
from datetime import datetime
import uuid

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    conn = get_db_connection()
    conn.execute('''CREATE TABLE IF NOT EXISTS application_logs
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    user_query TEXT,
    gpt_response TEXT,
    model TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

# Initialize the database
create_application_logs()


In [21]:
# Example usage for a new user
session_id = str(uuid.uuid4())
question = "What are some common mistakes in research paper format"
chat_history = get_chat_history(session_id)
answer = rag_chain.invoke({"input": question, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question, answer, "gpt-3.5-turbo")
print(f"Human: {question}")
print(f"AI: {answer}\n")

# Example of a follow-up question
question2 = "What were the improper uses??"
chat_history = get_chat_history(session_id)
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question2, answer2, "gpt-3.5-turbo")
print(f"Human: {question2}")
print(f"AI: {answer2}")

Human: What are some common mistakes in research paper format
AI: Some common mistakes in research paper format include:

1. **Incorrect File Format:** Submitting the document in a format other than .docx or .odt.

2. **Locked or Protected Regions:** Having sections of the document that cannot be edited.

3. **Improper Page Size:** Not setting the paper/page size to A4.

4. **Inconsistent Font Usage:** Using fonts other than "Times New Roman" for the main text or not applying a monospaced font for programming code.

5. **Alignment Issues:** Failing to justify normal paragraphs or not centering figures and tables.

6. **Excessive Blank Spaces:** Including two or more consecutive spaces or blank lines in the document.

7. **Use of Hard Tabs:** Using hard tabs instead of proper indentation for paragraphs.

8. **Title and Keywords Formatting:** Not writing the title and keywords in Title Case or using special characters unnecessarily.

9. **Replicating Content:** Repeating information from