In [4]:
from dotenv import load_dotenv
import os
load_dotenv()
openapi_key = os.environ['OPENAI_API_KEY']

In [5]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model='gpt-4o-mini')

In [6]:
from langchain.globals import set_llm_cache

In [7]:
%%time
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())

llm.predict("Tell me a joke")



CPU times: total: 375 ms
Wall time: 1.37 s


'Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!'

In [8]:
%%time

llm.predict("Tell me a joke")

CPU times: total: 0 ns
Wall time: 795 μs


'Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!'

In [9]:
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path=".langchain.db"))

In [10]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")

CPU times: total: 31.2 ms
Wall time: 1.27 s


'Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!'

In [11]:
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    FunctionMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)

In [12]:
from langchain_core.messages import (
    AIMessageChunk,
    FunctionMessageChunk,
    HumanMessageChunk,
    SystemMessageChunk,
    ToolMessageChunk,
)

In [13]:
AIMessageChunk(content="Hello") + AIMessageChunk(content=" World!")

AIMessageChunk(content='Hello World!', additional_kwargs={}, response_metadata={})

In [14]:
from langchain_openai import OpenAI
llm = ChatOpenAI(model='gpt-4o-mini').bind(logprobs=True)
msg = llm.invoke((("Human","How are you today")))

In [15]:
msg.response_metadata['logprobs']['content'][:5]

[{'token': "I'm",
  'bytes': [73, 39, 109],
  'logprob': -0.06198875,
  'top_logprobs': []},
 {'token': ' just',
  'bytes': [32, 106, 117, 115, 116],
  'logprob': -0.047015443,
  'top_logprobs': []},
 {'token': ' a',
  'bytes': [32, 97],
  'logprob': -4.406056e-05,
  'top_logprobs': []},
 {'token': ' program',
  'bytes': [32, 112, 114, 111, 103, 114, 97, 109],
  'logprob': -0.9774867,
  'top_logprobs': []},
 {'token': ',', 'bytes': [44], 'logprob': -1.0921943e-05, 'top_logprobs': []}]

In [16]:
ct = 0
full = None
for chunk in llm.stream(("human",'how are you today')):
    if ct <5:
        full = chunk if full is None else full + chunk
        if "logprobs" in full.response_metadata:
            print(full.response_metadata['logprobs']['content'])

        else :
            break 
        ct +=1

[]
[{'token': "I'm", 'bytes': [73, 39, 109], 'logprob': -0.062001865, 'top_logprobs': []}]
[{'token': "I'm", 'bytes': [73, 39, 109], 'logprob': -0.062001865, 'top_logprobs': []}, {'token': ' just', 'bytes': [32, 106, 117, 115, 116], 'logprob': -0.040235475, 'top_logprobs': []}]
[{'token': "I'm", 'bytes': [73, 39, 109], 'logprob': -0.062001865, 'top_logprobs': []}, {'token': ' just', 'bytes': [32, 106, 117, 115, 116], 'logprob': -0.040235475, 'top_logprobs': []}, {'token': ' a', 'bytes': [32, 97], 'logprob': -4.477578e-05, 'top_logprobs': []}]
[{'token': "I'm", 'bytes': [73, 39, 109], 'logprob': -0.062001865, 'top_logprobs': []}, {'token': ' just', 'bytes': [32, 106, 117, 115, 116], 'logprob': -0.040235475, 'top_logprobs': []}, {'token': ' a', 'bytes': [32, 97], 'logprob': -4.477578e-05, 'top_logprobs': []}, {'token': ' computer', 'bytes': [32, 99, 111, 109, 112, 117, 116, 101, 114], 'logprob': -0.35184705, 'top_logprobs': []}]
