In [None]:
!python -m pip install anthropic

In [6]:
import json
import requests
import streamlit as st
def search_web(query: str, num_results: int = 10) -> dict:
    """
    Perform a web search using Serper API
    
    Args:
        query (str): The search query
        num_results (int): Number of results to return (default: 5)
        
    Returns:
        dict: Search results containing organic results and other data
    """
    url = "https://google.serper.dev/search"
    payload = json.dumps({"q": query})
    headers = {
        'X-API-KEY': st.secrets["SERPER_API_KEY"],
        'Content-Type': 'application/json'
    }
    
    try:
        response = requests.post(url, headers=headers, data=payload)
        response.raise_for_status()
        search_results = response.json()
        
        return search_results
    
    except requests.exceptions.RequestException as e:
        error_message = f"Error calling Serper API: {str(e)}"
        if hasattr(e, 'response') and e.response is not None:
            error_message += f"\nResponse content: {e.response.text}"
        raise Exception(error_message)
    except json.JSONDecodeError as e:
        raise Exception(f"Error decoding JSON from Serper API: {str(e)}")
    
    
results = search_web("tesla stock price")
results

{'searchParameters': {'q': 'tesla stock price',
  'type': 'search',
  'engine': 'google'},
 'answerBox': {'title': 'Tesla Inc / Stock Price',
  'answer': '318.43 +7.25 (2.33%)',
  'source': 'Disclaimer'},
 'knowledgeGraph': {'title': 'Tesla',
  'type': 'Automotive company',
  'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQSDGLkBLN9Z7tmOvfutnh6BHdO21p4o4bYFHhapmi10yqlD2WYFvYi0w&s=0',
  'description': 'Tesla, Inc. is an American multinational automotive and clean energy company. Headquartered in Austin, Texas, it designs, manufactures and sells battery electric vehicles, stationary battery energy storage devices from home to grid-scale, solar...',
  'descriptionSource': 'Wikipedia',
  'descriptionLink': 'https://en.wikipedia.org/wiki/Tesla,_Inc.',
  'attributes': {'CEO': 'Elon Musk (Oct 2008–)',
   'Founded': 'July 1, 2003, San Carlos, CA',
   'Revenue': '81.46\xa0billion USD (2022)',
   'CFO': 'Vaibhav Taneja',
   'Subsidiaries': 'Tesla Energy, Tesla Insurance Ltd.,

In [8]:
def organize_search_results(search_data):
    """
    Organizes search results into a structured format for AI processing.
    Works with any search query by organizing the standard sections returned by the search API.
    
    Args:
        search_data (dict): Raw search results from search_web function
        
    Returns:
        dict: Organized and cleaned search data
    """
    organized_data = {
        "query_info": {},
        "direct_answer": None,
        "knowledge_panel": None,
        "search_results": [],
        "top_stories": [],
        "related_questions": [],
        "related_searches": []
    }
    
    # Extract search parameters
    if "searchParameters" in search_data:
        organized_data["query_info"] = {
            "query": search_data["searchParameters"].get("q"),
            "search_type": search_data["searchParameters"].get("type"),
            "engine": search_data["searchParameters"].get("engine")
        }
    
    # Extract direct answer if available (answerBox)
    if "answerBox" in search_data:
        organized_data["direct_answer"] = {
            "title": search_data["answerBox"].get("title"),
            "answer": search_data["answerBox"].get("answer"),
            "source": search_data["answerBox"].get("source")
        }
    
    # Extract knowledge graph/panel information
    if "knowledgeGraph" in search_data:
        kg = search_data["knowledgeGraph"]
        organized_data["knowledge_panel"] = {
            "title": kg.get("title"),
            "type": kg.get("type"),
            "description": kg.get("description"),
            "source": kg.get("descriptionSource"),
            "source_link": kg.get("descriptionLink"),
            "image_url": kg.get("imageUrl"),
            "attributes": kg.get("attributes", {})
        }
    
    # Extract organic search results
    if "organic" in search_data:
        organized_data["search_results"] = [
            {
                "title": result.get("title"),
                "link": result.get("link"),
                "snippet": result.get("snippet"),
                "position": result.get("position"),
                "sitelinks": result.get("sitelinks", [])
            }
            for result in search_data["organic"]
        ]
    
    # Extract top stories
    if "topStories" in search_data:
        organized_data["top_stories"] = [
            {
                "title": story.get("title"),
                "link": story.get("link"),
                "source": story.get("source"),
                "date": story.get("date"),
                "image_url": story.get("imageUrl")
            }
            for story in search_data["topStories"]
        ]
    
    # Extract "People Also Ask" questions
    if "peopleAlsoAsk" in search_data:
        organized_data["related_questions"] = [
            {
                "question": qa.get("question"),
                "snippet": qa.get("snippet"),
                "title": qa.get("title"),
                "link": qa.get("link")
            }
            for qa in search_data["peopleAlsoAsk"]
        ]
    
    # Extract related searches
    if "relatedSearches" in search_data:
        organized_data["related_searches"] = [
            search.get("query") for search in search_data["relatedSearches"]
        ]
    
    return organized_data

In [13]:
print(search_results)
len(search_results)

{'searchParameters': {'q': 'tesla stock price', 'type': 'search', 'engine': 'google'}, 'answerBox': {'title': 'Tesla Inc / Stock Price', 'answer': '317.98 +6.80 (2.19%)', 'source': 'Disclaimer'}, 'knowledgeGraph': {'title': 'Tesla', 'type': 'Automotive company', 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQSDGLkBLN9Z7tmOvfutnh6BHdO21p4o4bYFHhapmi10yqlD2WYFvYi0w&s=0', 'description': 'Tesla, Inc. is an American multinational automotive and clean energy company. Headquartered in Austin, Texas, it designs, manufactures and sells battery electric vehicles, stationary battery energy storage devices from home to grid-scale, solar...', 'descriptionSource': 'Wikipedia', 'descriptionLink': 'https://en.wikipedia.org/wiki/Tesla,_Inc.', 'attributes': {'CEO': 'Elon Musk (Oct 2008–)', 'Founded': 'July 1, 2003, San Carlos, CA', 'Revenue': '81.46\xa0billion USD (2022)', 'CFO': 'Vaibhav Taneja', 'Subsidiaries': 'Tesla Energy, Tesla Insurance Ltd., Tesla (Thailand), and more', 'Fou

8

In [16]:
#count how many words in the search results
def count_words_in_search_results(search_data):
    # Count words in the search results
    word_count = 0
    for result in search_data["organic"]:
        word_count += len(result["snippet"].split())
    return word_count

word_count = count_words_in_search_results(search_results)
word_count

223

In [None]:
from anthropic import AsyncAnthropic, Anthropic
import asyncio

# Non-streaming version
def call_claude_haiku(prompt: str, max_tokens: int = 1000) -> str:
    """
    Makes a synchronous call to Claude 3.5 Haiku
    Input: prompt string and optional max tokens
    Output: model response as string
    """
    client = Anthropic(api_key="your-api-key")
    
    response = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=max_tokens,
        temperature=0,
        messages=[
            {
                "role": "user", 
                "content": prompt
            }
        ]
    )
    return response.content[0].text

# Streaming version 
async def stream_claude_haiku(prompt: str, max_tokens: int = 1000):
    """
    Makes an async streaming call to Claude 3.5 Haiku
    Input: prompt string and optional max tokens
    Output: yields chunks of the response
    """
    async with AsyncAnthropic(api_key="your-api-key") as client:
        async with client.messages.stream(
            model="claude-3-haiku-20240307",
            max_tokens=max_tokens,
            temperature=0,
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        ) as stream:
            async for text in stream.text_stream:
                yield text

# Example usage of streaming version
async def main():
    prompt = "Write a haiku about coding"
    async for chunk in stream_claude_haiku(prompt):
        print(chunk, end="", flush=True)
    print()

if __name__ == "__main__":
    asyncio.run(main())

# anthropic pdf reader


In [None]:
!uv pip install --upgrade anthropic

In [3]:
import anthropic
import base64
from pathlib import Path

def analyze_pdf(pdf_path, question):
    """
    Input: Path to PDF file and question to ask about the PDF
    Process: Reads PDF, encodes it, and sends to Claude API
    Output: Claude's response about the PDF
    """
    # Read and encode PDF
    pdf_data = base64.b64encode(Path(pdf_path).read_bytes()).decode("utf-8")
    
    # Initialize Claude client
    client = anthropic.Anthropic()
    
    # Create message request
    message = client.beta.messages.create(
        model="claude-3-5-sonnet-20241022",
        betas=["pdfs-2024-09-25"],
        max_tokens=1024,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": "application/pdf",
                            "data": pdf_data
                        }
                    },
                    {
                        "type": "text",
                        "text": question
                    }
                ]
            }
        ],
    )
    
    # Simply return the text content
    return message.content[0].text

# Example usage
pdf_path = "/Users/huyknguyen/Desktop/redhorse/code_projects/ai_apps/docs/redhorse_docs/2024:10:2025-Workweek-Calendar.pdf"
question = "Please summarize the key dates and information from this calendar."

try:
    response = analyze_pdf(pdf_path, question)
    print(response)
except Exception as e:
    print(f"Error: {e}")


This is a 2025 calendar for RedHorse company that shows:

Paydays: Marked with red circles, occurring approximately twice per month
- Timesheets are due on the last day of pay periods (1st-15th and 16th-last day of month)
- If the last day falls on a weekend or holiday, timesheets are due the Friday before

Company Paid Holidays: Marked in blue shading, including dates like:
- January 20
- February 17
- April 7
- May 26
- July 4
- September 1
- October 13
- November 27
- December 25

The calendar clearly marks both paydays and holidays throughout all 12 months of 2025, helping employees track important dates for timesheet submissions and paid time off.


# openai audio output

In [None]:
import base64
from openai import OpenAI

client = OpenAI()

completion = client.chat.completions.create(
    model="gpt-4o-audio-preview",
    modalities=["text", "audio"],
    audio={"voice": "alloy", "format": "wav"},
    messages=[
        {
            "role": "user",
            "content": "Is a golden retriever a good family dog?"
        }
    ]
)

print(completion.choices[0])

wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
with open("dog.wav", "wb") as f:
    f.write(wav_bytes)



In [None]:
import base64
import streamlit as st
from openai import OpenAI

# Initialize OpenAI client
client = OpenAI()

# Function to generate audio response
def generate_audio_response(messages):
    completion = client.chat.completions.create(
        model="gpt-4o-audio-preview",
        modalities=["text", "audio"],
        audio={"voice": "alloy", "format": "wav"},
        messages=messages
    )
    return completion.choices[0]

# Function to decode and save audio
def save_audio(audio_data, filename):
    wav_bytes = base64.b64decode(audio_data)
    with open(filename, "wb") as f:
        f.write(wav_bytes)

# Initialize conversation history
conversation_history = []

# Streamlit UI
st.title("Multi-turn Conversation Bot")

# User input
user_input = st.text_input("You: ", "")

if user_input:
    # Add user message to conversation history
    conversation_history.append({"role": "user", "content": user_input})

    # Generate response
    response = generate_audio_response(conversation_history)

    # Add assistant message to conversation history
    conversation_history.append(response.message)

    # Save and play audio
    audio_filename = f"{response.message.audio.id}.wav"
    save_audio(response.message.audio.data, audio_filename)
    st.audio(audio_filename, format="audio/wav", autoplay=True)

    # Display transcript
    st.write("Assistant: ", response.message.audio.transcript)

# Display conversation history
st.subheader("Conversation History")
for message in conversation_history:
    if isinstance(message, dict):  # User message
        role = "You"
        content = message["content"]
    else:  # Assistant message
        role = "Assistant"
        content = message.audio.transcript
    st.write(f"{role}: {content}")

# elevenlab


In [None]:
!python -m pip install elevenlabs

In [7]:

import os
import uuid
from elevenlabs import VoiceSettings
from elevenlabs.client import ElevenLabs

ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
client = ElevenLabs(
    api_key=ELEVENLABS_API_KEY,
)


def text_to_speech_file(text: str) -> str:
    # Calling the text_to_speech conversion API with detailed parameters
    response = client.text_to_speech.convert(
        voice_id="pNInz6obpgDQGcFmaJgB", # Adam pre-made voice
        output_format="mp3_22050_32",
        text=text,
        model_id="eleven_turbo_v2_5", # use the turbo model for low latency
        voice_settings=VoiceSettings(
            stability=0.0,
            similarity_boost=1.0,
            style=0.0,
            use_speaker_boost=True,
        ),
    )

    # uncomment the line below to play the audio back
    # play(response)

    # Generating a unique file name for the output MP3 file
    save_file_path = f"{uuid.uuid4()}.mp3"

    # Writing the audio to a file
    with open(save_file_path, "wb") as f:
        for chunk in response:
            if chunk:
                f.write(chunk)

    print(f"{save_file_path}: A new audio file was saved successfully!")

    # Return the path of the saved audio file
    return save_file_path

text_to_speech_file("Hello World")

ffe8e309-cb36-462a-a88d-fe7538a01d3a.mp3: A new audio file was saved successfully!


'ffe8e309-cb36-462a-a88d-fe7538a01d3a.mp3'