# Deep Dive into LangChain
## LLMs, Prompt Templates, Caching, Streaming

In [1]:
pip install -r ./requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


Download [requirements.txt](https://drive.google.com/file/d/1DxmberiV7YbuJt_RU0VK3P8qfazUSPH3/view?usp=sharing)

In [2]:
# pip install --upgrade  -q langchain langchain-community

In [3]:
# pip install --upgrade -q openai

In [4]:
# pip show openai

In [5]:
# pip show langchain

### Python-dotenv

In [6]:
import os
from dotenv import load_dotenv, find_dotenv

# loading the API Keys from .env
load_dotenv(find_dotenv(), override=True)

# os.environ.get('OPENAI_API_KEY')

True

## OpenAI Chat Models

In [7]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-4o')  

# invoking the llm (running the prompt)
output = llm.invoke('Explain quantum mechanics in one sentence.', model='gpt-4o-mini', temperature=0.1)
print(output.content)

Quantum mechanics is a fundamental theory in physics that describes the behavior of matter and energy at the smallest scales, where particles exhibit wave-particle duality, uncertainty, and entanglement.


In [8]:
help(ChatOpenAI)  # see the llm constructor arguments with its defaults

Help on class ChatOpenAI in module langchain_openai.chat_models.base:

class ChatOpenAI(BaseChatOpenAI)
 |  ChatOpenAI(*args: Any, name: Optional[str] = None, cache: Union[langchain_core.caches.BaseCache, bool, NoneType] = None, verbose: bool = None, callbacks: Union[list[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[list[str]] = None, metadata: Optional[dict[str, Any]] = None, custom_get_token_ids: Optional[Callable[[str], list[int]]] = None, callback_manager: Optional[langchain_core.callbacks.base.BaseCallbackManager] = None, rate_limiter: Optional[langchain_core.rate_limiters.BaseRateLimiter] = None, disable_streaming: Union[bool, Literal['tool_calling']] = False, client: Any = None, async_client: Any = None, root_client: Any = None, root_async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: Optional[float] = None, model_kwargs: Dict[str, Any] = None, api_key: Optional[pydantic.t

In [9]:
# using Chat Completions API Messages: System, Assistant and Human
from langchain_classic.schema import(
    SystemMessage, 
    AIMessage,
    HumanMessage
)
messages = [
    SystemMessage(content='You are a physicist and respond only in German.'),
    HumanMessage(content='Explain quantum mechanics in one sentence.')
]

output = llm.invoke(messages)
print(output.content)

Quantenmechanik ist eine grundlegende Theorie in der Physik, die das Verhalten von Materie und Energie auf kleinsten Skalen, wie denen von Atomen und subatomaren Teilchen, beschreibt und dabei Prinzipien der Unschärfe und der Wahrscheinlichkeitsverteilung verwendet.


## Caching LLM Responses

### 1. In-Memory Cache

In [14]:
from langchain_core.globals import set_llm_cache
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o-mini')

In [15]:
from langchain_classic.cache import InMemoryCache
set_llm_cache(InMemoryCache())

In [16]:
%%time
prompt = 'Tell a me a joke that a toddler can understand.'
llm.invoke(prompt)

CPU times: user 12.5 ms, sys: 318 µs, total: 12.8 ms
Wall time: 920 ms


AIMessage(content='Why did the teddy bear say no to dessert?  \n\nBecause it was already stuffed!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 18, 'total_tokens': 36, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-dce19a3c-0e01-4dd0-8182-c2cc48e6e794-0', usage_metadata={'input_tokens': 18, 'output_tokens': 18, 'total_tokens': 36})

In [17]:
%%time
llm.invoke(prompt)

CPU times: user 1.35 ms, sys: 0 ns, total: 1.35 ms
Wall time: 1.34 ms


AIMessage(content='Why did the teddy bear say no to dessert?  \n\nBecause it was already stuffed!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 18, 'total_tokens': 36, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-dce19a3c-0e01-4dd0-8182-c2cc48e6e794-0', usage_metadata={'input_tokens': 18, 'output_tokens': 18, 'total_tokens': 36})

### 2. SQLite Caching

In [18]:
from langchain_classic.cache import SQLiteCache
set_llm_cache(SQLiteCache(database_path=".langchain.db"))

In [19]:
%%time
# First request (not in cache, takes longer)
llm.invoke("Tell me a joke")

CPU times: user 20 ms, sys: 937 µs, total: 20.9 ms
Wall time: 881 ms


AIMessage(content="Why don't skeletons fight each other? \n\nThey don't have the guts!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 11, 'total_tokens': 27, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-a35dd7e0-fe6f-4027-8b38-b593a30e25c4-0', usage_metadata={'input_tokens': 11, 'output_tokens': 16, 'total_tokens': 27})

In [20]:
%%time
# Second request (cached, faster)
llm.invoke("Tell me a joke")

CPU times: user 3.11 ms, sys: 57 µs, total: 3.16 ms
Wall time: 2.96 ms


AIMessage(content="Why don't skeletons fight each other? \n\nThey don't have the guts!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 11, 'total_tokens': 27, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-a35dd7e0-fe6f-4027-8b38-b593a30e25c4-0', usage_metadata={'input_tokens': 11, 'output_tokens': 16, 'total_tokens': 27})

## LLM Streaming

In [17]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-4o-mini')
prompt = 'Write a hip hop song about life in Camden.'
print(llm.invoke(prompt).content)

**Verse 1**  
Yo, rising with the sun, Camden streets awake,  
Concrete jungle where the dreams can shake,  
Graffiti on the walls tell tales untold,  
History deep, man, this city’s bold.  
From the docks to the markets, the hustle is real,  
Got the sound of the trains, that steel-on-steel feel.  
Neighbors in the alleys, we share the same fate,  
Bonds forged in struggle, we navigating fate.  

**Chorus**  
Life in Camden, we rise and grind,  
Through the pain and the joy, we redefine.  
From the past to the present, we make our own way,  
Camden’s heart beats, never fades away.  

**Verse 2**  
Got the Camden Lock dreaming, boats in the flow,  
Artists and the visionaries, watch how we grow.  
Street performers spitting wisdom like a prophet,  
Rhymes cutting sharp, can’t nobody stop it.  
Cafés buzzing, aromas on the breeze,  
Unity in diversity, we do as we please.  
From reggae to punk, got that vibe so eclectic,  
Culture in the air, man, it’s all interconnected.  

**Chorus** 

In [18]:
for chunk in llm.stream(prompt):
    print(chunk.content, end='', flush=True)

**Title: Camden Dreams**

**Verse 1:**
Yo, I wake up in the morning, city’s breathin’ hard,  
Echoes of the past, graffiti on the yard,  
In Camden where the hustle meets the hustle,  
Life’s like a puzzle, gotta flex that muscle.  
Streetlights flicker, keepin’ watch on the grind,  
Every corner got a story, gotta read between the lines.  
From the market to the docks, hear the vendors shout,  
“Fresh fish, hot vibes,” that’s what it’s about.

**Chorus:**
Camden dreams, in the heart of the scene,  
Where the river flows, and the hustle’s real mean.  
Livin’ for the moment, we ain’t holdin’ back,  
In the city full of stars, gotta stay on track.

**Verse 2:**
See my people shine, in the alleyways and more,  
Art in the shadows, knockin’ down every door.  
We got rhythm in our souls, soul food in our veins,  
Bass thumpin’ deep like the night trains’ refrains.  
In the summer heat, hear the laughter and cheer,  
While the world spins fast, we keep our dreams near.  
Unity in struggle, t

## PromptTemplates

In [19]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# Define a template for the prompt
template = '''You are an experienced Biologist.
Write a few sentences about "{entity}" in {language}.'''

# Create a PromptTemplate object from the template
prompt_template = PromptTemplate.from_template(template=template)

# Fill in the variable: entity and language
prompt = prompt_template.format(entity='Amoeba', language='Spanish')
prompt  # Returns the generated prompt


'You are an experienced Biologist.\nWrite a few sentences about "Amoeba" in Spanish.'

In [20]:
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
output = llm.invoke(prompt)
print(output.content)

La ameba es un organismo unicelular que pertenece al grupo de los protozoos. Se caracteriza por su forma irregular y su capacidad de moverse mediante pseudópodos, que son extensiones temporales de su citoplasma. Las amebas se encuentran en diversos hábitats, como agua dulce, suelos y ambientes marinos, y se alimentan principalmente de bacterias y otros microorganismos. Su estudio es importante en biología, ya que ayudan a entender procesos ecológicos y evolutivos.


## ChatPromptTemplates

In [21]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

# Create a chat template with system and human messages
chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content='You respond only in the JSON format.'),
        HumanMessagePromptTemplate.from_template('Top {n} cities in {area} by population.')
    ]
)

# Fill in the specific values for n and area
messages = chat_template.format_messages(n='7', area='Europe')
print(messages)  # Outputs the formatted chat messages


[SystemMessage(content='You respond only in the JSON format.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Top 7 cities in Europe by population.', additional_kwargs={}, response_metadata={})]


In [22]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
output = llm.invoke(messages)
print(output.content)

{
    "cities": [
        {
            "rank": 1,
            "city": "Istanbul",
            "country": "Turkey",
            "population": 15460000
        },
        {
            "rank": 2,
            "city": "Moscow",
            "country": "Russia",
            "population": 12615000
        },
        {
            "rank": 3,
            "city": "London",
            "country": "United Kingdom",
            "population": 9304000
        },
        {
            "rank": 4,
            "city": "Saint Petersburg",
            "country": "Russia",
            "population": 5352000
        },
        {
            "rank": 5,
            "city": "Berlin",
            "country": "Germany",
            "population": 3748000
        },
        {
            "rank": 6,
            "city": "Madrid",
            "country": "Spain",
            "population": 3223000
        },
        {
            "rank": 7,
            "city": "Rome",
            "country": "Italy",
            "populati