In [1]:
# RUN THIS BEFORE ATTEMPTING, OTHERWISE MAY GET CLIENT ERRORS ON SOME AWS CALLS
import boto3

boto3.setup_default_session()

In [2]:
import os

from dotenv import load_dotenv

# Load .env file
load_dotenv()
aws_profile = os.getenv("AWS_PROFILE")
llm_region = os.getenv("LLM_REGION")
llm_model = os.getenv("LLM_MODEL")
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

Using AWS Region: us-east-1
Using LLM model: CLAUDE_SONNET_3_5


In [3]:
# Setup notebook to show logger messages
import logging
import sys
from datetime import datetime

from IPython.display import HTML, display


class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)

            # Define colors for different log levels
            colors = {
                logging.DEBUG: "#6c757d",  # gray
                logging.INFO: "#0d6efd",  # blue
                logging.WARNING: "#ffc107",  # yellow
                logging.ERROR: "#dc3545",  # red
                logging.CRITICAL: "#721c24",  # dark red
            }

            # Get color for current log level
            color = colors.get(record.levelno, "#000000")

            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-sise: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime("%H:%M:%S")}</span>
                <span>{msg}</span>
            </div>
            """

            # Display the formatted log entry
            display(HTML(log_entry))

            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)

        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)


def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks

    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger

    logger = logging.getLogger(__name__)

    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)

    # Set the logging level
    logger.setLevel(level)

    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter("%(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    return logger


# Usage example:
logger = setup_notebook_logging(logging.INFO)


# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [4]:
logger = setup_notebook_logging()

# Package Tests

In [5]:
import numpy as np
import pandas as pd


def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [6]:
from chai import chAI

chai = chAI()



In [7]:
sample_df = create_sample_dataset()

In [None]:
response = chai.steep(
    data=sample_df,
    prompt="Analyse this dataset and suggest visualisations to explore trends and insights.",
)
print(response.teapot.suggestions)  # Prints the suggestions specifically

📊 VISUALISATION SUGGESTIONS:


📈 TIME SERIES ANALYSIS OF SALES AND CUSTOMERS
Purpose: To visualize the trend of sales and customer count over time
Chart Type: Line chart with dual y-axis
Variables: date (x-axis), sales (primary y-axis), customers (secondary y-axis)
Expected Insights: Identify patterns in sales and customer numbers, seasonal trends, and correlation between sales and customer count

--------------------------------------------------

📈 CATEGORY DISTRIBUTION
Purpose: To show the distribution of sales across different categories
Chart Type: Pie chart or Donut chart
Variables: category, sum of sales
Expected Insights: Understand which category contributes most to overall sales

--------------------------------------------------

📈 SATISFACTION SCORE VS SALES
Purpose: To explore the relationship between customer satisfaction and sales
Chart Type: Scatter plot
Variables: satisfaction_score (x-axis), sales (y-axis)
Expected Insights: Determine if there's a correlation between 

## Copycat functionality

Provide a chart image and ask LLM to provide the plotly code and output to an html file

In [12]:
prompt = """
    Please analyse this chart and provide a comprehensive review with the following structure:

    1. Key Findings:
    - What are the most significant patterns or insights?
    - What are the highest and lowest values?
    - Are there any notable disparities or trends?

    2. Comparative Analysis:
    - Compare the different outcome groups
    - Identify any significant gaps or differences
    - Highlight any unexpected patterns

    Please provide your analysis in clear, business-friendly language suitable for stakeholders.
    """

In [13]:
json_images = chai.steep(prompt=prompt, image_path="../tests/img/satisfaction.png")

Error in analyse_image: Cannot refresh credentials: MFA token required.
Error handling image request: Cannot refresh credentials: MFA token required.


RefreshWithMFAUnsupportedError: Cannot refresh credentials: MFA token required.

In [17]:
print(type(json_images))

NameError: name 'json_images' is not defined

In [None]:
print(json_images["analysis"])

## Insights
1. The distribution of satisfaction scores is right-skewed, with more scores concentrated towards the higher end.
2. The highest frequency occurs around the 4.2-4.4 range, with about 17-18 counts.
3. The lowest frequency is at the 3.6 score, with about 7 counts.
4. There's a notable increase in frequency for the highest score range (4.8-5.0).
5. The majority of satisfaction scores fall between 3.8 and 5.0.
6. The distribution shows some fluctuations, with local peaks at 3.8-4.0, 4.2-4.4, and 4.8-5.0.


In [None]:
print(json_images["code"])

import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 11, 12, 17, 13, 12, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange'
)])

fig.update_layout(
    title='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    xaxis=dict(tickmode='array', tickvals=satisfaction_scores),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='white',
    template='plotly_white'
)

fig.show()


In [None]:
print(json_images["path"])

/Users/jose.orjales/gds-ideas-chai/examples/plotly_visualisations/visualisation_1741878552.html


In [None]:
images = chai.steep(prompt=prompt, image_path="../tests/img/satisfaction.png")

In [12]:
images.teapot.analysis

AttributeError: 'dict' object has no attribute 'teapot'

## Asking for some template code

In [14]:
chart_prompt = "I want a red chart with bold axis titles and labels on the bars. There should also be a legend showing each distinct category"
chart_request = chai.steep(prompt=chart_prompt, chart_type="scatter")

In [15]:
print(chart_request["code"])

import plotly.graph_objects as go
import numpy as np

# Generate sample data
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2 * x + np.random.normal(0, 2, 50)

# Create figure with multiple series
fig = go.Figure()

# Add multiple scatter series with different shades of red
series_data = [
    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#ff0000'},
    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#ff3333'},
    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#ff6666'},
    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#ff9999'}
]

for series in series_data:
    fig.add_trace(
        go.Scatter(
            x=series['x'],
            y=series['y'],
            mode='markers',
            name=series['name'],
            marker=dict(
                size=10,
                color=series['color'],
                opacity=0.7,
                line=dict(width=1, color='#444444')
            ),
            hovertemplate='x: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
