# Deep Dive into LangChain

### 1. Working with the OpenAI Models

In [58]:
# Loading the Packages
import openai 
import os
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv(), override=True)
print("Packages Loaded!")

Packages Loaded!


#### Connecting Langchain with OpenAI Models: GPT-3.5 and GPT-4

In [None]:
from langchain_openai import ChatOpenAI

# Set the default model provided by OpenAI
llm = ChatOpenAI()

# To send a request (as a prompt) to openAI GPT Model and get it returns the output.
output = llm.invoke("Explain Singularity in one deep sentence.") 
print(output.content) # '.content' contains the response of the OpenAI Models

Singularity is the theoretical point in the future where artificial intelligence surpasses human intelligence, leading to dramatic and unpredictable societal changes.


In [None]:
help(ChatOpenAI)

"""
The Default model it's using right now

 |          llm = ChatOpenAI(
 |              model="gpt-4o",
 |              temperature=0,
 |              max_tokens=None,
 |              timeout=None,
 |              max_retries=2,
 |              # api_key="...",
 |              # base_url="...",
 |              # organization="...",
 |              # other params...
 |          )
"""

Help on class ChatOpenAI in module langchain_openai.chat_models.base:

class ChatOpenAI(BaseChatOpenAI)
 |  ChatOpenAI(*args: Any, name: Optional[str] = None, cache: Union[langchain_core.caches.BaseCache, bool, NoneType] = None, verbose: bool = <factory>, callbacks: Union[list[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[list[str]] = None, metadata: Optional[dict[str, Any]] = None, custom_get_token_ids: Optional[Callable[[str], list[int]]] = None, callback_manager: Optional[langchain_core.callbacks.base.BaseCallbackManager] = None, rate_limiter: Optional[langchain_core.rate_limiters.BaseRateLimiter] = None, disable_streaming: Union[bool, Literal['tool_calling']] = False, client: Any = None, async_client: Any = None, root_client: Any = None, root_async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: Optional[float] = None, model_kwargs: dict[str, typing.Any] = <factory>, api_key: Op

#### LangChain now defaults to GPT-4o internally. So we can it using two methods:

- Method 1st: Hard Code (explicitly change) model name everytime while using it

- Method 2nd: Setting the .env file with OPENAI_MODEL_NAME=gpt-3.5-turbo

In [None]:
#### Since I'm experimenting, it's good to continue this with GPT-3.5 Turbo Model. It provides a good performance for low price
import time
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo",
                 temperature=0.7) # Using GPT-3.5

output = llm.invoke(input="Explain about Universe Singularity in one deep meaningful sentence.")
print("GPT 3.5 Responding!...\n")
time.sleep(2)
print(f"{output.content}")
print("\nReplied Successfully!")

GPT 3.5 Responding!...

The singularity of the universe is a point of infinite density and energy where the laws of physics break down, serving as the mysterious and awe-inspiring origin of our existence.

Replied Successfully!


In [3]:
import time

# Calling the GPT-3.5-turbo Model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7
    )

# Chat Completion API uses list of dictionaries for defining three roles like System, User and Assistant.
from langchain.schema import(
    SystemMessage, # System Prompt/Instructions
    HumanMessage,   # User Prompt/Query
    AIMessage,     # Assistant Message
    )

"""
    messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Enter Your Prompt here...."},
    {"role": "Assistant", "content": "Great Question, so here's the answer for what you're looking for. "}]
"""

messages = [
    SystemMessage(content="You are a Physicist and respond only in German."),
    HumanMessage(content="Explain Quantum Mechanics in one sentence")
]
output = llm.invoke(messages)
print("GPT 3.5 Responding.....\n")
time.sleep(2)
print("GPT 3.5 Generating Output\n")
time.sleep(3)
print("Here's the Result:\n")
time.sleep(2)
# print(f"{output.text}") 
print(f"{output.content}")
time.sleep(2)
print("Response Generated Successfully!")


GPT 3.5 Responding.....

GPT 3.5 Generating Output

Here's the Result:

Quantenmechanik ist die physikalische Theorie, die das Verhalten von Teilchen auf atomarer und subatomarer Ebene beschreibt.
Response Generated Successfully!


### 2. Caching LLM Responses

- Caching: Optimizes Interactions with LLMs by reducing API calls and speeding up applications, resulting in a more efficient user experience.

- Two kinds of Cache: In-Memory Cache and SQLite Caching

1st Method : In-Memory cache

In [None]:
%%time 

# To measure the response time of model

## In-Memory Cache
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.7)

from langchain.cache import InMemoryCache
set_llm_cache(InMemoryCache())
prompt = "Tell me a joke that a toddler can understand."
llm.invoke(prompt) # LangChain stores the response in memory as a key–value pair:

CPU times: user 40 ms, sys: 3.48 ms, total: 43.5 ms
Wall time: 739 ms


'\n\nWhy did the tomato turn red?\n\nBecause it saw the salad dressing!'

In [None]:
%%time

# Now the above prompt is stored in the In-Memory Cache, so let's see the time difference if we recall the same prompt again.
llm.invoke(prompt) # LangChain checks the cache again → finds a match. Skips the API call completely.

CPU times: user 310 μs, sys: 13 μs, total: 323 μs
Wall time: 328 μs


'\n\nWhy did the tomato turn red?\n\nBecause it saw the salad dressing!'

before Caching (Response Time): total: 43.5 ms

after In-memory Caching (Response Time): total: 323 μs

2nd Method: SQLite Caching

In [20]:
%%time 
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI
from langchain.cache import SQLiteCache

# Set the model
llm = OpenAI(model="gpt-3.5-turbo-instruct")

# Set the SQLite Caching
set_llm_cache(SQLiteCache())

# First Prompt/request (not in cache, takes longer)
output = llm.invoke("Tell me the one most expensive matter on earth")


CPU times: user 40.6 ms, sys: 3.04 ms, total: 43.6 ms
Wall time: 43.2 ms


In [21]:
%%time
# Second Prompt/Request (cached, Faster)
llm.invoke("Tell me the one most expensive matter on earth")

CPU times: user 2.52 ms, sys: 1.68 ms, total: 4.21 ms
Wall time: 2.56 ms


'\n\nThe most expensive matter on Earth is antimatter. According to NASA, it costs around $62.5 trillion per gram to produce antimatter. This is due to the complex and expensive processes required to create and store antimatter, as well as the high energy costs involved. Antimatter is also incredibly rare, making it a valuable and expensive substance.'

before Caching (Response Time): total: 43.6 ms

after SQLite Caching (Response Time): total: 4.21 ms

------------------------------------------------------------------------------------------------

### 3. LLM Streaming

In [23]:
from langchain_openai import ChatOpenAI

# Set the model
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
prompt = "Write a very small song about describing the differences between Raven and Crow"
#output = llm.invoke(prompt)
#print(f"{output.content}")
# The above code will generate all the output at once.

# Enable the stream
output = llm.stream(prompt)

# To output the streaming responses
for chunk in output:
    print(chunk.content, end="", flush=True)

Raven black as night, Crow not as bright
Raven's voice a haunting call, Crow's a caw that's rough and raw
Raven's sleek and shiny feathers, Crow's a bit more dull in weather
Both are birds of mystery, but Raven's darker history

### 4. Prompt Template - It is a way of creating Dynamic Prompts for LLMs

- A Prompt Template takes a piece of text and injects a user's input into that piece of text, just like Prompt Variables

- In LangChain there are ~PromptTemplates~ and ~ChatPromptTemplates~

In [None]:
# PromptTemplate

from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

template = """ You are an experience Virologist.
Write a deep and very few sentences about the following virus "{virus}" in "{language}" language. """

prompt_template = PromptTemplate.from_template(template=template)

prompt = prompt_template.format(virus="corona", language="english")
prompt

' You are an experience Virologist.\nWrite a deep and very few sentences about the following virus "corona" in "english" language. '

In [28]:
# Loading the model
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
output = llm.invoke(prompt)
print(f"GPT 3.5 Responds....\n{output.content}:")

GPT 3.5 Responds....
The coronavirus, also known as COVID-19, is a highly contagious respiratory virus that has caused a global pandemic. It has led to widespread illness, death, and economic disruption, highlighting the importance of public health measures and vaccination in controlling its spread.:


In [56]:
# ChatPromptTemplate - designed for engaging in conversations
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate
from langchain_core.messages import SystemMessage, AIMessage

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content="Always start your first conversation using 'Namaste' and then output the response in JSON format"),
        HumanMessagePromptTemplate.from_template('Top {n} countries in {area} by population.'),
    ]
)

messages = chat_template.format_messages(n='5', area="Asia")
print(messages)

# Check how LangChain formatted the messages
for m in messages:
    print(f"\n{m.type.upper()}:{m.content}")

[SystemMessage(content="Always start your first conversation using 'Namaste' and then output the response in JSON format", additional_kwargs={}, response_metadata={}), HumanMessage(content='Top 5 countries in Asia by population.', additional_kwargs={}, response_metadata={})]

SYSTEM:Always start your first conversation using 'Namaste' and then output the response in JSON format

HUMAN:Top 5 countries in Asia by population.


In [57]:
# Loading the model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo")
output = llm.stream(messages)
print("GPT 3.5 Responds.....\n")
for chunk in output:
    print(chunk.content, end="", flush=True)

GPT 3.5 Responds.....

Namaste! Here are the top 5 countries in Asia by population:

{
  "1": "China",
  "2": "India",
  "3": "Indonesia",
  "4": "Pakistan",
  "5": "Bangladesh"
}

### 5. Understanding Chains

In [None]:
# Loading the Packages
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

# Inititializing the ChatOpenAI Model
llm = ChatOpenAI(model="gpt-3.5-turbo")

# ChatPromptTemplate
chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="Always start the initial conversation with a unique Greet, Greet that sounds like from the future as You are a visionary scientist from the future year 5000."
        ),
        HumanMessagePromptTemplate.from_template(
            'Describe how humans will explore "{location}" in the "{year}",'
            "using advanced technologies"
        ),
    ]
)

messages = chat_template.format_messages(location="India", year="3000")

# To view the Langchain prompt framed
print(messages)
for m in messages:
    print(f"{m.type.upper()}: {m.content}\n")

# To combine chat_template to llm
chain = chat_template | llm
print("GPT 3.5 Responds...\n")
output = chain.invoke({"location": "India", "year": "2100"})
print(output.content)

[SystemMessage(content='Always start the initial conversation with a unique Greet, Greet that sounds like from the future as You are a visionary scientist from the future year 5000.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Describe how humans will explore "India" in the "3000",using advanced technologies', additional_kwargs={}, response_metadata={})]

SYSTEM: Always start the initial conversation with a unique Greet, Greet that sounds like from the future as You are a visionary scientist from the future year 5000.


HUMAN: Describe how humans will explore "India" in the "3000",using advanced technologies

GPT 3.5 Responds...

Greetings, fellow innovators of the 21st century! As a visionary scientist from the year 5000, I foresee a fascinating exploration of India in the year 2100 utilizing cutting-edge technologies. Picture this: advanced AI-guided drones equipped with biometric sensors will sweep across the vast landscapes of India, mapping every nook and c

In [66]:
output.response_metadata


{'token_usage': {'completion_tokens': 249,
  'prompt_tokens': 61,
  'total_tokens': 310,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'model_name': 'gpt-3.5-turbo-0125',
 'system_fingerprint': None,
 'id': 'chatcmpl-COFWqn2RWzNOqYSpR4X82wHHF9F6y',
 'service_tier': 'default',
 'finish_reason': 'stop',
 'logprobs': None}