In [6]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
import ollama
import anthropic
import gradio as gr

In [7]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'
MODEL_ANTHROPIC = 'claude-3-7-sonnet-latest'

In [8]:
# set up environment

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')


openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")


# Connect to OpenAI, Anthropic and ollama

openai = OpenAI()

claude = anthropic.Anthropic()

ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
API key looks good so far


In [9]:
system_prompt = """
You are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I
need you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If 
it is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. 
"""

In [10]:

def ask_for_dataset():
    return input("Do you need a dataset?: ")

In [18]:
def stream_claude(message, history):
    """
    Stream Claude responses for Gradio ChatInterface.
    
    Args:
        message (str): Current user message
        history (list): Chat history in Gradio format [(user_msg, assistant_msg), ...]
    
    Yields:
        str: Streaming response from Claude
    """
    # Convert Gradio history to Anthropic API format
    messages = []
    
    if history:
        for item in history:
            try:
                if isinstance(item, (list, tuple)) and len(item) >= 2:
                    user_msg, assistant_msg = item[0], item[1]
                    
                    # Add user message
                    if user_msg:
                        messages.append({"role": "user", "content": str(user_msg)})
                    
                    # Add assistant message if it exists
                    if assistant_msg:
                        messages.append({"role": "assistant", "content": str(assistant_msg)})
                        
            except (IndexError, TypeError) as e:
                print(f"Warning: Skipping malformed history item {item}: {e}")
                continue
    
    # Add current user message
    messages.append({"role": "user", "content": str(message)})
    
    # Stream response from Claude
    try:
        stream = claude.messages.stream(
            model="claude-3-haiku-20240307",
            max_tokens=1000,
            temperature=0.7,
            system=system_prompt,
            messages=messages
        )
        
        response = ""
        
        with stream as message_stream:
            for text in message_stream.text_stream:
                if text:  # Only process non-empty text chunks
                    response += text
                    yield response
        
        # Ensure we yielded something
        if not response:
            yield "I'm sorry, I couldn't generate a response."
            
    except Exception as e:
        error_message = f"Error communicating with Claude: {str(e)}"
        print(error_message)
        yield error_message

In [21]:

def stream_gpt(message, history):
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]

    print("History is:")
    print(history)
    print("And messages is:")
    print(messages)

    stream = openai.chat.completions.create(model=MODEL_GPT, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

In [19]:
gr.ChatInterface(
    fn=stream_claude,
    type="messages",  # 🚨 critical: this means `fn` receives a list of messages
    title="Chat with Claude",
    description="Talk to Claude 3 Haiku using the Anthropics API",
).launch()

* Running on local URL:  http://127.0.0.1:7891
* To create a public link, set `share=True` in `launch()`.




In [27]:
def stream_model(message, history, model):
    """
    Stream responses from selected model.
    
    Args:
        message (str): Current user message
        history (list): Chat history
        model (str): Selected model ("Claude" or "GPT")
    
    Yields:
        str: Streaming response from selected model
    """
    if model == "Claude":
        yield from stream_claude(message, history)
    elif model == "GPT":
        yield from stream_gpt(message, history)
    else:
        yield f"Error: Unknown model '{model}'. Please select Claude or GPT."


In [28]:
def create_chat_interface():
    with gr.Blocks(title="Multi-Model Chat") as demo:
        gr.Markdown("# Chat with Claude or GPT")
        
        with gr.Row():
            model_selector = gr.Dropdown(
                choices=["Claude", "GPT"],
                value="Claude",
                label="Select Model",
                interactive=True
            )
        
        chatbot = gr.Chatbot(
            height=500,
            label="Chat History"
        )
        
        with gr.Row():
            msg = gr.Textbox(
                placeholder="Type your message here...",
                label="Message",
                scale=4
            )
            send_btn = gr.Button("Send", scale=1)
        
        # Clear button
        clear_btn = gr.Button("Clear Chat")
        
        def respond(message, history, model):
            """Handle user input and stream response."""
            if not message.strip():
                return "", history
            
            # Add user message to history
            history.append([message, None])
            
            # Stream the response
            response = ""
            for partial_response in stream_model(message, history[:-1], model):
                response = partial_response
                # Update the last assistant message in history
                history[-1][1] = response
                yield "", history
        
        def clear_chat():
            return [], ""
        
        # Set up event handlers
        msg.submit(
            respond,
            inputs=[msg, chatbot, model_selector],
            outputs=[msg, chatbot],
            show_progress=False
        )
        
        send_btn.click(
            respond,
            inputs=[msg, chatbot, model_selector],
            outputs=[msg, chatbot],
            show_progress=False
        )
        
        clear_btn.click(
            clear_chat,
            outputs=[chatbot, msg]
        )
    
    return demo

# Launch the interface
if __name__ == "__main__":
    demo = create_chat_interface()
    demo.launch()




  chatbot = gr.Chatbot(


* Running on local URL:  http://127.0.0.1:7893
* To create a public link, set `share=True` in `launch()`.


History is:
[['hey', "Hello! I'm an AI assistant created by Anthropic to help with data science tasks. How can I assist you today?"], ['i need a dataset', "Okay, great! Could you please provide more details about the dataset you need? I'll need some information to generate the appropriate dataset for you. Some key details that would be helpful:\n\n- What is the purpose or use case for this dataset?\n- How many rows (observations) do you need in the dataset?\n- What kind of features or columns should the dataset have? \n- What data types should the features be (e.g. numeric, categorical, text, etc.)?\n- Are there any specific value ranges or distributions you need the data to have?\n- Should the dataset have any relationships or correlations between the features?\n- Would you like the dataset in a CSV or JSON format?\n\nThe more details you can provide, the better I can generate a dataset that meets your needs. Please let me know the relevant details and I'll be happy to create the data

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py

History is:
[]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I\nneed you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If \nit is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. \n'}, {'role': 'user', 'content': 'i want a json'}]
History is:
[['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.']]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py

History is:
[['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.']]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I\nneed you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If \nit is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. \n'}, ['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.'], {'role': 'user', 'content': 'two columns, ids and 

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py

History is:
[['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.']]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I\nneed you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If \nit is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. \n'}, ['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.'], {'role': 'user', 'content': 'two columns, ids and 

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py

History is:
[['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.']]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I\nneed you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If \nit is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. \n'}, ['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.'], {'role': 'user', 'content': 'two columns, ids and 

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py

History is:
[['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.']]
And messages is:
[{'role': 'system', 'content': '\nYou are helping me in an important data science role where I have to build models based on meta-data without seeing the true data. To test that my models do the correct thing, I\nneed you to generate datasets  when given a description of what they look like. You will respond in either with either a JSON or a CSV depending on what is being asked. If \nit is unclear what should be the range or type of data in a given column, you should prompt the user to give more information. \n'}, ['i want a json', 'Please provide a description of the dataset you would like to generate in JSON format, including the fields, types, and any specific constraints or ranges you want for the data.'], {'role': 'user', 'content': 'two columns, ids and 

Traceback (most recent call last):
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2191, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1714, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gabrielbattcock/miniconda3/envs/llms/lib/py