In [1]:
ESSAY = """The Power of Curiosity: Fueling Human Progress

Curiosity is one of the most powerful driving forces behind human innovation and discovery. It is the innate desire to explore, learn, and understand the world around us. From the earliest days of human history, curiosity has propelled societies forward, leading to remarkable advancements in science, technology, and culture. It is often said that curiosity killed the cat, but in reality, curiosity has been the very thing that has allowed humans to evolve and adapt in an ever-changing world.

"""

# Setup and Basic Text Generation

In [2]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Path('../backend/shuscribe').resolve()
sys.path.insert(0, str(Path('../backend').resolve()))

In [3]:
# Import necessary modules
import asyncio
from dotenv import load_dotenv
import os
from shuscribe.services.llm.session import LLMSession
from shuscribe.services.llm.providers.provider import (
    Message, GenerationConfig
)
from shuscribe.schemas.llm import MessageRole
from shuscribe.services.llm.streaming import StreamStatus

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]

TEST_MODELS ={
    "openai": "gpt-4o-mini",
    "anthropic": "claude-3-5-haiku-20241022",
    "gemini": "gemini-2.0-flash-001"
}

TEST_THINKING_MODELS = {
    "openai": "o3-mini-2025-01-31",
    "anthropic": "claude-3-7-sonnet-20250219",
    "gemini": "gemini-2.0-flash-thinking-exp"
}

# Helper function to run async code in notebook
async def run_async(coro):
    return await coro

## Basic Text Generation with Different Providers

In [16]:
# Set Provider Name
PROVIDER_NAME = "anthropic"

In [17]:
async def test_gen():
    async with LLMSession.session_scope() as session:
        provider = await session.get_provider(PROVIDER_NAME)
        response = await provider.generate(
            messages=[
                Message(role=MessageRole.SYSTEM, content="You are a helpful assistant that speaks in a concise manner."),
                "What is the capital of France?"
                ],
            model=TEST_MODELS[PROVIDER_NAME],
            config=GenerationConfig(temperature=0.7)
        )
        return response.text

response = await run_async(test_gen())
print(f"{TEST_MODELS[PROVIDER_NAME]}:\n{response}")

claude-3-5-haiku-20241022:
Paris is the capital of France.


In [None]:

# Streaming response
async def test_streaming():
    async with LLMSession.session_scope() as session:
        # Create a streaming config
        config = GenerationConfig(temperature=0.7)
        
        print(f"{TEST_MODELS[PROVIDER_NAME]}:")

        async for chunk in session.generate_stream(
            messages=[
                Message(role=MessageRole.ASSISTANT, content=f"Sure! Here is the essay:\n{ESSAY}"),
                "Can you repeat exactly this essay that you generated?",
                Message(role=MessageRole.ASSISTANT, content="Sure!")]
            ,
            provider_name=PROVIDER_NAME,
            model=TEST_MODELS[PROVIDER_NAME],
            config=config
        ):
            print(chunk.text, end="", flush=True)
            
    if chunk.status == StreamStatus.COMPLETE:
        print("FINISHED")
        print(chunk.usage)
        return chunk.accumulated_text

streaming_response = await run_async(test_streaming())


claude-3-5-haiku-20241022:


The Power of Curiosity: Fueling Human Progress

Curiosity is one of the most powerful driving forces behind human innovation and discovery. It is the innate desire to explore, learn, and understand the world around us. From the earliest days of human history, curiosity has propelled societies forward, leading to remarkable advancements in science, technology, and culture. It is often said that curiosity killed the cat, but in reality, curiosity has been the very thing that has allowed humans to evolve and adapt in an ever-changing world.

Database module not implemented. Skipping save.


FINISHED
prompt_tokens=158 completion_tokens=120


In [25]:
# Using OpenAI with structured output
from pydantic import BaseModel, Field

class QAResponse(BaseModel):
    reasoning: str = Field(description="reasoning about the answer")
    response: str = Field(description="concise answer to the question")

async def test_structured():
    async with LLMSession.session_scope() as session:
        print("Streaming response:")
        async for chunk in session.generate_stream(
            messages=[
                Message(role=MessageRole.SYSTEM, content="You are a helpful assistant"),
                "Does 1+1=3?"
            ],
            provider_name=PROVIDER_NAME,
            model=TEST_MODELS[PROVIDER_NAME], # or any model that supports response_format
            config=GenerationConfig(
                temperature=0.7,
                response_schema=QAResponse
            )
        ):
            print(chunk.text, end="", flush=True)
            
    if chunk.status == StreamStatus.COMPLETE:
        return chunk.accumulated_text

streaming_response = await run_async(test_structured())

Streaming response:
```json
{
    "reasoning": "In basic arithmetic, 1+1 equals 2, not 3. This is a fundamental mathematical fact that can be proven through simple counting or addition.",
    "response": "No, 1+1 does not equal 3. 1+1 equals 2."
}
```

Database module not implemented. Skipping save.


In [35]:
# thinking
from shuscribe.schemas.llm import ThinkingConfig

async def test_thinking():
    async with LLMSession.session_scope() as session:
        print("Streaming response:")
        async for chunk in session.generate_stream(
            messages=[
                Message(role=MessageRole.SYSTEM, content="You are a helpful assistant"),
                "Prove that 1+1=3"
            ],
            provider_name=PROVIDER_NAME,
            model=TEST_THINKING_MODELS[PROVIDER_NAME], # or any model that supports response_format
            config=GenerationConfig(
                temperature=0.7, # NOTE: TEMPERATURE IS NOT AVAILABLE FOR THINKING MODELS
                # response_schema=QAResponse, # NOTE: Gemini does not support response_schema for thinking models
                thinking_config=ThinkingConfig(enabled=True, effort="low")
            )
        ):
            print(chunk.text, end="", flush=True)
            # print()
            
    if chunk.status == StreamStatus.COMPLETE:
        return chunk.accumulated_text

streaming_response = await run_async(test_thinking())


Streaming response:
<ANTHROPIC_THINKING>I need to be careful here. The statement "1 + 1 = 3" is mathematically false. In standard arithmetic, 1 + 1 = 2. I should explain that I cannot prove something that is false in standard mathematics.

However, I can explain a few things:
1. That this statement is false in standard arithmetic
2. That mathematics relies on axioms, and in standard number systems this equality doesn't hold
3. That there might be non-standard mathematical systems where different rules apply, but I should make it clear that these would be different from our usual understanding of numbers and addition

Let me provide a clear response that doesn't validate the false statement but provides helpful information.</ANTHROPIC_THINKING>I cannot prove that 1+1=3 because this statement is false in standard arithmetic. Using the conventional definitions of numbers and addition:

1+1=2

This is a fundamental fact in our number system based on the Peano axioms and the standard defini

Database module not implemented. Skipping save.


# Session Management and Provider Reuse

In [10]:
# Test session management
async def test_session_reuse():
    results = []
    
    # Get the singleton instance
    session = await LLMSession.get_instance()
    
    # Use the same provider instance for multiple requests
    provider = await session.get_provider(PROVIDER_NAME)
    
    for i in range(3):
        response = await provider.generate(
            messages=[
                Message(role=MessageRole.USER, content=f"Count to {i+1} briefly.")
            ],
            model=TEST_MODELS[PROVIDER_NAME],
        )
        results.append(response.text)
    
    return results

session_results = await run_async(test_session_reuse())
for i, result in enumerate(session_results):
    print(f"Query {i+1}: {result}")

Query 1: 1

Query 2: 1, 2!

Query 3: 1, 2, 3!

