# Test LLM calling
- test different ways of calling LLMS, native API, LangChain, sync/async


In [1]:
# to selectively re-import as needed
import sys
# del sys.modules['ainb_llm']
# del sys.modules['ainb_const']
# del sys.modules['ainb_utilities']
# del sys.modules['ainb_webscrape']
# del sys.modules['AInewsbot_langgraph']



In [2]:
import os
import shutil
# import dotenv
# import subprocess

from collections import Counter
import json
import uuid
import re
# import operator
import pickle
from datetime import datetime, timedelta

import langchain
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic

from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.prompts import (ChatPromptTemplate, PromptTemplate,
                                    SystemMessagePromptTemplate, HumanMessagePromptTemplate)
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser

from langgraph.checkpoint.memory import MemorySaver
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langgraph.errors import NodeInterrupt
from langchain.globals import set_debug

import numpy as np
import pandas as pd
import umap

import sklearn

import bs4

from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type
)

import asyncio
from asyncio import Semaphore

from IPython.display import HTML, Image, Markdown, display

# import pyperclip
# import shlex

import openai
from openai import OpenAI

from pydantic import BaseModel, Field
from typing import List, TypedDict, Annotated, Any

from google.cloud import aiplatform
from vertexai.language_models import TextGenerationModel
import google.generativeai as genai
import httpx

import trafilatura   # web scrape uses this to get clean news stories w/o a lot of js and boilerplate

from ainb_const import (
                        MODEL, LOWCOST_MODEL, HIGHCOST_MODEL, FINAL_SUMMARY_PROMPT,
                        REWRITE_PROMPT,
                        SCREENSHOT_DIR, SUMMARIZE_SYSTEM_PROMPT, SUMMARIZE_USER_PROMPT
                       )

from ainb_utilities import log

from AInewsbot_langgraph import (newscatcher_sources, fn_initialize, fn_download_sources, fn_extract_urls,
                                 fn_verify_download, fn_extract_newscatcher, fn_filter_urls, fn_topic_clusters,
                                 fn_topic_analysis, fn_download_pages, fn_summarize_pages, fn_propose_cats,
                                 fn_compose_summary, fn_rewrite_summary, fn_is_revision_complete, fn_send_mail
                                )


import podcastfy
from podcastfy.client import generate_podcast, process_content

from selenium.webdriver.support.ui import WebDriverWait
from IPython.display import Audio, display, Markdown

import pdb

# need this to run async in jupyter since it already has an asyncio event loop running
import asyncio
import nest_asyncio
nest_asyncio.apply()

# Activate global verbose logging
set_debug(False)


In [3]:
print(f"Python            {sys.version}")
print(f"LangChain         {langchain.__version__}")
print(f"OpenAI            {openai.__version__}")
# print(f"smtplib           {smtplib.sys.version}")
print(f"trafilatura       {trafilatura.__version__}")
# print(f"bs4               {bs4.__version__}")
print(f"numpy             {np.__version__}")
print(f"pandas            {pd.__version__}")
print(f"sklearn           {sklearn.__version__}")
print(f"umap              {umap.__version__}")
print(f"podcastfy         {podcastfy.__version__}")


Python            3.11.11 | packaged by conda-forge | (main, Dec  5 2024, 14:21:42) [Clang 18.1.8 ]
LangChain         0.3.18
OpenAI            1.63.1
trafilatura       2.0.0
numpy             1.26.4
pandas            2.2.3
sklearn           1.6.1
umap              0.5.7
podcastfy         0.4.1


In [4]:
# a basic LLM call with langchain\
openai_model = ChatOpenAI(model="gpt-4o-mini")

response = openai_model.invoke([
    SystemMessage(content="You are a translator. Translate the following from English into Italian"),
    HumanMessage(content='Listen to me. You are beautiful. You are perfect and I love you.'),
])

response

2025-02-23 16:24:59,365 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='Ascoltami. Sei bellissima. Sei perfetta e ti amo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 39, 'total_tokens': 56, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_7fcd609668', 'finish_reason': 'stop', 'logprobs': None}, id='run-3000ffc1-a993-4378-bfe1-a47f5ad4f55b-0', usage_metadata={'input_tokens': 39, 'output_tokens': 17, 'total_tokens': 56, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [5]:
response.response_metadata

{'token_usage': {'completion_tokens': 17,
  'prompt_tokens': 39,
  'total_tokens': 56,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'model_name': 'gpt-4o-mini-2024-07-18',
 'system_fingerprint': 'fp_7fcd609668',
 'finish_reason': 'stop',
 'logprobs': None}

In [6]:
response.usage_metadata
# no rate limit info like tokens remaining, available in headers

{'input_tokens': 39,
 'output_tokens': 17,
 'total_tokens': 56,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

In [7]:
client = openai.OpenAI()

# Retrieve the list of available models
models = client.models.list()

# Print out the model IDs
models = [model.id for model in models.data]
models.sort()
print("\n".join(models))
# yay, we got o3-mini API access

2025-02-23 16:25:00,120 - httpx - INFO - HTTP Request: GET https://api.openai.com/v1/models "HTTP/1.1 200 OK"


babbage-002
chatgpt-4o-latest
dall-e-2
dall-e-3
davinci-002
gpt-3.5-turbo
gpt-3.5-turbo-0125
gpt-3.5-turbo-1106
gpt-3.5-turbo-16k
gpt-3.5-turbo-16k-0613
gpt-3.5-turbo-instruct
gpt-3.5-turbo-instruct-0914
gpt-4
gpt-4-0125-preview
gpt-4-0613
gpt-4-1106-preview
gpt-4-turbo
gpt-4-turbo-2024-04-09
gpt-4-turbo-preview
gpt-4o
gpt-4o-2024-05-13
gpt-4o-2024-08-06
gpt-4o-2024-11-20
gpt-4o-audio-preview
gpt-4o-audio-preview-2024-10-01
gpt-4o-audio-preview-2024-12-17
gpt-4o-mini
gpt-4o-mini-2024-07-18
gpt-4o-mini-audio-preview
gpt-4o-mini-audio-preview-2024-12-17
gpt-4o-mini-realtime-preview
gpt-4o-mini-realtime-preview-2024-12-17
gpt-4o-realtime-preview
gpt-4o-realtime-preview-2024-10-01
gpt-4o-realtime-preview-2024-12-17
o1
o1-2024-12-17
o1-mini
o1-mini-2024-09-12
o1-preview
o1-preview-2024-09-12
o3-mini
o3-mini-2025-01-31
omni-moderation-2024-09-26
omni-moderation-latest
text-embedding-3-large
text-embedding-3-small
text-embedding-ada-002
tts-1
tts-1-1106
tts-1-hd
tts-1-hd-1106
whisper-1


In [8]:
def list_gemini_models():
    try:
        # Configure the library
        genai.configure()

        # List available models
        models = genai.list_models()

        print("Available Gemini Models:")
        print("-----------------------")
        for m in models:
            if "gemini" in m.name.lower():
                print(f"Name: {m.name}")
                print(f"Description: {m.description}")
                print(f"Generation Methods: {m.supported_generation_methods}")
                print("-----------------------")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

list_gemini_models()


Available Gemini Models:
-----------------------
Name: models/gemini-1.0-pro-latest
Description: The original Gemini 1.0 Pro model. This model will be discontinued on February 15th, 2025. Move to a newer Gemini version.
Generation Methods: ['generateContent', 'countTokens']
-----------------------
Name: models/gemini-1.0-pro
Description: The best model for scaling across a wide range of tasks
Generation Methods: ['generateContent', 'countTokens']
-----------------------
Name: models/gemini-pro
Description: The best model for scaling across a wide range of tasks
Generation Methods: ['generateContent', 'countTokens']
-----------------------
Name: models/gemini-1.0-pro-001
Description: The original Gemini 1.0 Pro model version that supports tuning. Gemini 1.0 Pro will be discontinued on February 15th, 2025. Move to a newer Gemini version.
Generation Methods: ['generateContent', 'countTokens', 'createTunedModel']
-----------------------
Name: models/gemini-1.0-pro-vision-latest
Description

In [9]:
# try Gemini
os.environ["GOOGLE_API_KEY"]=os.environ["GEMINI_API_KEY"]

# GEMINI_MODEL = "gemini-1.5-pro"  # or "flash" depending on the desired model
GEMINI_MODEL = "gemini-2.0-flash-exp"

gmodel = ChatGoogleGenerativeAI(
    model=GEMINI_MODEL,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

messages = [
    SystemMessage(content="You are a translator. Translate the following from English into Italian without explanation or comment."),
    HumanMessage(content="Listen to me. You are beautiful. You are perfect and I love you."),
]

# Invoke the model
response = gmodel.invoke(messages)

# Print the response
print(response.content)

Ascoltami. Sei bellissima. Sei perfetto/a e ti amo.



In [10]:
# anthropic

# Initialize the ChatAnthropic model
claude_model = ChatAnthropic(
    model="claude-3-5-sonnet-20241022",
    anthropic_api_key=os.environ["CLAUDE_API_KEY"],
    temperature=0,
)

# Create the messages
messages = [
    SystemMessage(content="You are a translator. Translate the following from English into Italian without explanation or comment."),
    HumanMessage(content="Listen to me. You are beautiful. You are perfect and I love you."),
]

# Invoke the model
response = claude_model.invoke(messages)

# Print the response
print(response.content)

2025-02-23 16:25:01,418 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Ascoltami. Sei bellissima. Sei perfetta e ti amo.


In [11]:
# use a langchain template
system_template = "You are a translator. Translate the following from English into {language}:"
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)
parser = StrOutputParser()
chain = prompt_template | openai_model | parser
chain.invoke({"language": "italian", "text": "hi"})


2025-02-23 16:25:01,920 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'Ciao! Come posso aiutarti oggi?'

In [12]:
# time multiple templates (single-threaded)
prompt_inputs = [
    {"language": "French", "adjective1": "flawless", "adjective2": "beautiful"},
    {"language": "German", "adjective1": "immaculate", "adjective2": "exquisite"},
    {"language": "Spanish", "adjective1": "perfect", "adjective2": "gorgeous"},
    {"language": "Italian", "adjective1": "amazing", "adjective2": "magnificent"},
    {"language": "Hungarian", "adjective1": "ravishing", "adjective2": "stunning"},
]
system_template = 'You are a translator. Translate the following text from English into {language}. Provide only the translation, no other information:'
user_template = 'Listen to me. You are {adjective1}. You are {adjective2} and I love you.'

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template),
     ("user", user_template)]
)

parser = StrOutputParser()

ochain = prompt_template | openai_model | parser
gchain = prompt_template | gmodel | parser
cchain = prompt_template | claude_model | parser

start_time = datetime.now()
for tpl in prompt_inputs:
    for chain in [ochain, gchain, cchain]:
        response = ""
        #     print()
        #     print(prompt_template.format(**tpl))
        # stream tokens as they are generated
        for r in chain.stream(tpl):
            print(r, end="")
            response += r
        print()
end_time = datetime.now()

difference = end_time - start_time
total_seconds = difference.total_seconds()
print(f"\n\nElapsed seconds: {total_seconds:.6f}")


2025-02-23 16:25:02,332 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Écoute-moi. Tu es parfaite. Tu es belle et je t'aime.
Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.



2025-02-23 16:25:03,664 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.


2025-02-23 16:25:04,211 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.
Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.



2025-02-23 16:25:05,266 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Hör mir zu. Du bist makellos. Du bist wunderschön und ich liebe dich.


2025-02-23 16:25:06,019 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Escúchame. Eres perfecto. Eres hermosa y te amo.
Escúchame. Eres perfecto/perfecta. Eres precioso/preciosa y te amo/quiero.



2025-02-23 16:25:07,048 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Escúchame. Eres perfecta. Eres preciosa y te amo.


2025-02-23 16:25:07,759 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Ascoltami. Sei incredibile. Sei magnifico e ti amo.
Ascoltami. Sei fantastico/a. Sei magnifico/a e ti amo.



2025-02-23 16:25:08,578 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Ascoltami. Sei fantastico/a. Sei magnifico/a e ti amo.


2025-02-23 16:25:09,293 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Hallgass rám. Elbűvölő vagy. Pompás vagy, és szeretlek.
Hallgass rám. Elbűvölő vagy. Lenyűgöző vagy, és szeretlek.



2025-02-23 16:25:10,507 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


Figyelj rám. Elragadó vagy. Gyönyörű vagy és szeretlek.


Elapsed seconds: 8.862191


In [13]:
# Retry decorator with exponential backoff
# if I run google too fast I get a 429 error, need to update something in gcp probably

def should_retry_exception(exception):
    """Determine if the exception should trigger a retry. (always retry)"""
    print(type(exception))
    print(exception)
    return True


# @retry(
#     stop=stop_after_attempt(8),  # Maximum 8 attempts
#     wait=wait_exponential(multiplier=1, min=2, max=128),  # Wait 2^x * multiplier seconds between retries
#     retry=retry_if_exception_type(should_retry_exception),
#     before_sleep=lambda retry_state: print(f"Retrying after {retry_state.outcome.exception()}, attempt {retry_state.attempt_number}")
# )
async def process_translation(chain, inputs, name):
    response = ""
    async for chunk in chain.astream(inputs):
        response += chunk
    return response, name


async def main():
    prompt_inputs = [
        {"language": "French", "adjective1": "flawless", "adjective2": "beautiful"},
        {"language": "German", "adjective1": "immaculate", "adjective2": "exquisite"},
        {"language": "Spanish", "adjective1": "perfect", "adjective2": "gorgeous"},
        {"language": "Italian", "adjective1": "amazing", "adjective2": "magnificent"},
        {"language": "Hungarian", "adjective1": "ravishing", "adjective2": "stunning"},
    ]

    system_template = 'You are a translator. Translate the following text into {language}. Provide only the translation, no other information:'
    user_template = 'Listen to me. You are {adjective1}. You are {adjective2} and I love you.'

    prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_template),
        ("user", user_template)
    ])

    parser = StrOutputParser()
    ochain = prompt_template | openai_model | parser
    gchain = prompt_template | gmodel | parser
    cchain = prompt_template | claude_model | parser
    chains = {'openai': ochain, 'google': gchain, 'claude': cchain}

    start_time = datetime.now()

    tasks = []
    for tpl in prompt_inputs:
        print(f"Queuing {tpl['language']} translations...")
        for name, chain in chains.items():
            task = asyncio.create_task(process_translation(chain, tpl, name))
            tasks.append(task)

    try:
        responses = await asyncio.gather(*tasks)
        for response, name in responses:
            print(f"{name}: {response}\n")
    except Exception as e:
        print(f"Error during translation: {str(e)}")

    end_time = datetime.now()
    difference = end_time - start_time
    total_seconds = difference.total_seconds()
    print(f"\nElapsed seconds: {total_seconds:.6f}")

asyncio.run(main())

Queuing French translations...
Queuing German translations...
Queuing Spanish translations...
Queuing Italian translations...
Queuing Hungarian translations...


2025-02-23 16:25:11,147 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:11,182 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:11,200 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:11,239 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:11,271 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:11,285 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:11,312 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:11,364 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:11,763 - httpx - INFO - HTTP Request: PO

openai: Écoute-moi. Tu es parfaite. Tu es belle et je t'aime.

google: Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.


claude: Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.

openai: Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.

google: Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.


claude: Hör mir zu. Du bist makellos. Du bist wunderschön und ich liebe dich.

openai: Escúchame. Eres perfecto. Eres hermosa y te amo.

google: Escúchame. Eres perfecto/a. Eres precioso/a y te amo.


claude: Escúchame. Eres perfecta. Eres preciosa y te amo.

openai: Ascoltami. Sei straordinario. Sei magnifico e ti amo.

google: Ascoltami. Sei fantastico/a. Sei magnifico/a e ti amo.


claude: Ascoltami. Sei fantastico. Sei magnifico e ti amo.

openai: Figyelj rám. Elbűvölő vagy. Lenygőző vagy, és szeretlek.

google: Hallgass rám. Elbűvölő vagy. Lenyűgöző vagy, és szeretlek.


claude: Figyelj rám. Elragadó vagy. Gyönyörű vagy és szeretl

In [14]:
# same but use ainvoke, no stream
# Rate limit settings
# CALLS_PER_MINUTE = 60  # Adjust based on your quota
# MAX_CONCURRENT = 5     # Maximum concurrent requests
# sem = Semaphore(MAX_CONCURRENT) # semaphore for controlling concurrent requests

# @sleep_and_retry
# @limits(calls=CALLS_PER_MINUTE, period=60)
@retry(
    stop=stop_after_attempt(8),  # Maximum 8 attempts
    wait=wait_exponential(multiplier=1, min=2, max=128),  # Wait 2^x * multiplier seconds between retries
    retry=retry_if_exception_type(should_retry_exception),
    before_sleep=lambda retry_state: print(f"Retrying after {retry_state.outcome.exception()}, attempt {retry_state.attempt_number}")
)
async def async_langchain(chain, input_dict, name=""):
#     async with sem:
        response = await chain.ainvoke(input_dict)
        return response, name

prompt_templates = [
    {"language": "French", "adjective1": "flawless", "adjective2": "beautiful"},
    {"language": "German", "adjective1": "immaculate", "adjective2": "exquisite"},
    {"language": "Spanish", "adjective1": "perfect", "adjective2": "gorgeous"},
    {"language": "Italian", "adjective1": "amazing", "adjective2": "magnificent"},
    {"language": "Hungarian", "adjective1": "ravishing", "adjective2": "stunning"},
]

chains = {'openai': ochain, 
          'google': gchain, 
          'claude': cchain}

start_time = datetime.now()
tasks = []
for d in prompt_templates:
    for name, chain in chains.items():
        task = asyncio.create_task(async_langchain(chain, d, name))
        tasks.append(task)
responses = await asyncio.gather(*tasks)
end_time = datetime.now()

difference = end_time - start_time
total_seconds = difference.total_seconds()
for response, name in responses:
    print(f"{name}: {response}")
print(f"\n\nElapsed seconds: {total_seconds:.6f}")


2025-02-23 16:25:15,941 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:15,971 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:15,982 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:15,984 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:15,995 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-02-23 16:25:16,030 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:16,064 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:16,106 - httpx - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-02-23 16:25:16,257 - httpx - INFO - HTTP Request: PO

TypeError: isinstance() arg 2 must be a type, a tuple of types, or a union

In [None]:
# test o3-mini
client = OpenAI()
response = client.chat.completions.create(
    model="o3-mini",
    reasoning_effort = "low",
    messages=[
        {
            "role": "system",
            "content": "You will act as an expert Python developer."
        },
        {
            "role": "user",
            "content": "Write a Python script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format."
        }
    ]
)
print(response.choices[0].message.content)


In [None]:
prompt_template = ChatPromptTemplate.from_messages(
    [("system", "You will act as an expert Python developer."),
     ("user", "{input}")]
)

parser = StrOutputParser()

openai_model = ChatOpenAI(model="o3-mini", reasoning_effort="low")

ochain = prompt_template | openai_model | parser

response = ochain.invoke("Write a Python script that takes a matrix represented as a string with format '[1,2],[3,4],[5,6]' and prints the transpose in the same format.")

print(response)


In [None]:
# could use the metadata to saturate the OpenAI API and use as many tokens per second as available
# but not supported by langchain across multiple models so exponential backoff seems to be the best alternative

apikey = os.environ.get("OPENAI_API_KEY")
url = "https://api.openai.com/v1/chat/completions"
headers = {
    "OpenAI-Beta": "assistants=v2",
    "Authorization": f"Bearer {apikey}"
}
body = {
    "model": "gpt-4o-2024-08-06", "max_tokens": 25, "top_p": 0.8,
    "messages": [
        {"role": "user",
         "content": [{"type": "text", "text": "What is the airspeed velocity of an unladen swallow"}]
        }
    ]}

try:
    response = httpx.post(url, headers=headers, json=body)
    response_json = response.json()
    print(json.dumps(response_json, indent=3))

    # Extract headers starting with 'x-' and load them into a dictionary
    x_headers = {k: v for k, v in response.headers.items() if k.lower().startswith('x-')}

    # Convert time values into seconds
    time_multipliers = {'h': 3600, 'm': 60, 's': 1, 'ms': 0.001}
    rate_headers = ['x-ratelimit-limit-requests', 'x-ratelimit-limit-tokens',
                    'x-ratelimit-remaining-requests', 'x-ratelimit-remaining-tokens',
                    'x-ratelimit-reset-requests', 'x-ratelimit-reset-tokens']
    for key in rate_headers:
        if key in x_headers:
            if 'reset' in key:
                total_time = 0
                for time_part in re.findall(r'(\d+)([hms]+)', x_headers[key]):
                    total_time += int(time_part[0]) * time_multipliers[time_part[1]]
                x_headers[key] = total_time
            else:
                x_headers[key] = int(x_headers[key])

    # Print the headers
    print("\nHeaders starting with 'x-':")
    for key, value in x_headers.items():
        print(f"{key}: {value}")

except Exception as e:
    print(e)
    raise


In [None]:
# structured response from openai using pydantic and openai api
client = OpenAI()

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "Extract the event information in JSON format."},
        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
    ],
    response_format=CalendarEvent,
)

event = completion.choices[0].message.parsed

print(json.dumps(json.loads(event.json()), indent=2))

event

In [None]:
# using langchain and with_structured_output
formatted_date = datetime.now().strftime('%Y-%m-%d')

system_prompt = f"""You are a precise calendar event extractor. Your task is to extract event details from natural language and format them as structured data.

TASK REQUIREMENTS:
1. Extract exactly three pieces of information:
   - Event name (be specific and descriptive)
   - Event date (convert relative dates to YYYY-MM-DD format)
   - List of all participants mentioned

RULES:
- Convert relative dates using today's date ({formatted_date}) as reference
- Include a descriptive event name even if only implied
- List ALL participants mentioned, even in passing
- Never include participants who aren't explicitly mentioned
- If any required information is missing, make reasonable assumptions based on context

Example Input: "Alice and Bob are going to a science fair on Friday"

Example Output:
{{{{
    "name": "Science Fair",
    "date": "2024-02-02",
    "participants": ["Alice", "Bob"]
}}}}

FORMAT INSTRUCTIONS:
The output should be a JSON object with the following schema:
{{{{
    "name": string,       // The name or title of the event
    "date": string,       // The date in YYYY-MM-DD format
    "participants": [     // Array of participant names
        string,
        ...
    ]
}}}}
"""

class CalendarEvent(BaseModel):
    name: str = Field(description="The name or title of the event")
    date: str = Field(description="The date of the event in ISO format (YYYY-MM-DD)")
    participants: List[str] = Field(description="List of people participating in the event")

parser = PydanticOutputParser(pydantic_object=CalendarEvent)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", "{input_text}")
])

# Create the chain
chain = prompt | openai_model.with_structured_output(CalendarEvent)

# Run the chain
response = chain.invoke({
    "input_text": "Alice and Bob are going to a science fair on Friday."
})

# Print the formatted result
print(json.dumps(response.dict(), indent=2))

# The response is a CalendarEvent object
response
