# 03: Prompt Caching

[docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching)

In [None]:
from anthropic import Anthropic
import time

In [None]:
import os
if not os.environ["ANTHROPIC_API_KEY"]:
    print("environment variable ANTHROPIC_API_KEY not found, checking in api.txt")
    with open("api.txt") as i:
        api = i.read().strip()
        if not api:
            print("nothing found in api.txt, create a key in console.anthropic.com and paste it there")
        else:
            os.environ["ANTHROPIC_API_KEY"] = api
            print("key found in api.txt")  
else:
    print("environment variable ANTHROPIC_API_KEY found")    

In [None]:
client = Anthropic()
MODEL_NAME="claude-3-5-sonnet-20241022"

### Loading The Book

In [None]:
with open('files/frankenstein.txt', 'r') as file:
    book_content = file.read()

In [None]:
len(book_content)

In [None]:
print(book_content[1000:2000])

## Uncached Request

In [None]:
import time
def make_non_cached_api_call():
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "<book>" + book_content + "</book>"
                },
                {
                    "type": "text",
                    "text": "What happens in chapter 3?"
                }
            ]
        }
    ]

    start_time = time.time()
    response = client.messages.create(
        model=MODEL_NAME,
        max_tokens=500,
        messages=messages,
    )
    end_time = time.time()

    return response, end_time - start_time

In [None]:
non_cached_response, non_cached_time = make_non_cached_api_call()
print(f"Non-cached time: {non_cached_time:.2f} seconds")

print("\nOutput (non-cached):")
print(non_cached_response.content)

In [None]:
non_cached_response.usage

## Cached Version

In [None]:
def make_cached_api_call():
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "<book>" + book_content + "</book>",
                    "cache_control": {"type": "ephemeral"}
                },
                {
                    "type": "text",
                    "text": "What happens in chapter 5?"
                }
            ]
        }
    ]

    start_time = time.time()
    response = client.messages.create(
        model=MODEL_NAME,
        max_tokens=500,
        messages=messages,
    )
    end_time = time.time()

    return response, end_time - start_time

Note: this is likely to exceed the tokens-per-minute limit we are allowed to have... :}, need to wait for a bit...

In [None]:
response1, duration1 = make_cached_api_call()
print("response 1")
print(f"duration: {duration1}")
print(response1.usage)  # `cache_creation_input_tokens` should have all the tokens!
print()

print(response1.content[0].text)

time.sleep(60) # wait 1 minute to prevent block

In [None]:
# response2, duration2 = make_cached_api_call()

print(f"response 2")
print(f"duration: {duration2}")
print(response2.usage) # `cache_read_input_tokens` should have all the tokens!
print()

print(response2.content[0].text)

## Prompt Caching Pricing

* Cache write tokens are 25% more expensive than base input tokens
* Cache read tokens are 90% cheaper than base input tokens
* Regular input and output tokens are priced at standard rates

## Multi-Turn Caching

When implementing a chatbot, adding `"cache_control": {"type": "ephemeral"}` to the **last two messages** will have the following effect:
1. The first (earlier) call to the cache will attempt to retrieve any available cache up until that point. If no cache is found, one will be created.
2. The second (later) call will update any existing cache, so that in the next round, the first (earlier) call to cache will retrieve pre-computed information up until that point (the whole conversation).

In [None]:
messages=[
    # ...long conversation so far
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Hello, can you tell me more about the solar system",
                "cache_control": {"type": "ephemeral"}
            }
        ]
    },
    {
        "role": "assistant",
        "content": "Certainly! The solar system is the collection of celestial bodies that orbit our Sun. It consists of eight planets, numerous moons, asteroids, comets, and other objects. The planets, in order from closest to farthest from the Sun, are: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Each planet has its own unique characteristics and features. Is there a specific aspect of the solar system you'd like to know more about?"
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Tell me more about Mars.",
                "cache_control": {"type": "ephemeral"}
            }
        ]
    }
]

In [None]:
messages=[
    # ...long conversation so far
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Hello, can you tell me more about the solar system",
            }
        ]
    },
    {
        "role": "assistant",
        "content": "Certainly! The solar system is the collection of celestial bodies that orbit our Sun. It consists of eight planets, numerous moons, asteroids, comets, and other objects. The planets, in order from closest to farthest from the Sun, are: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Each planet has its own unique characteristics and features. Is there a specific aspect of the solar system you'd like to know more about?"
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Tell me more about Mars.",
                "cache_control": {"type": "ephemeral"}
            }
        ]
    },
    {
        "role": "assistant",
        "content": "I'd love to tell you about Mars.  Mars is...."
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "That's really neat.  Tell me about Pluto!",
                "cache_control": {"type": "ephemeral"}
            }
        ]
    },
]

## Chatbot with caching

In [None]:
print("Simple Chatbot (type 'quit' to exit)")
# Store conversation history
messages = []
while True:
    # Get user input
    user_input = input("You: ")
    # Check for quit command
    if user_input.lower() == 'quit':
        print("Goodbye!")
        break
    # Add user message to history, with cache
    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": user_input,
                    "cache_control": {"type": "ephemeral"}
                }
            ]
        }
    )
   
    # caching: removing cache from older messages (keep cache only for
    # last 2 exchanges/4 messages) (improved code with Claude)
    if len(messages) > 4:
        # Find the oldest message that should have its cache removed
        old_message_index = len(messages) - 5
        if old_message_index >= 0 and "content" in messages[old_message_index]:
            # If it's a user message with content as a list
            if isinstance(messages[old_message_index]["content"], list):
                for content_item in messages[old_message_index]["content"]:
                    if "cache_control" in content_item:
                        del content_item["cache_control"]
                        
    # for debugging
    # print(*messages, sep="\n")
    
    try:
        # Get response from Claude
        response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=200,
            messages=messages
        )
        # Extract and print Claude's response
        asst_message = response.content[0].text
        print("Assistant:", asst_message)
        
        # Add assistant response to history
        messages.append({"role": "assistant", "content": asst_message})
        
    except Exception as e:
        print()
        print(f"An error occurred: {e}")
        print(messages)