In [9]:
# RUN THIS BEFORE ATTEMPTING, OTHERWISE MAY GET CLIENT ERRORS ON SOME AWS CALLS
import boto3
boto3.setup_default_session()

In [10]:
from dotenv import load_dotenv
import os

# Load .env file
load_dotenv()
aws_profile = os.getenv('AWS_PROFILE')
llm_region = os.getenv('LLM_REGION')
llm_model = os.getenv('LLM_MODEL')
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

Using AWS Profile: bedrock-access-jose
Using AWS Region: us-east-1
Using LLM model: anthropic.claude-3-5-sonnet-20240620-v1:0


In [11]:
# Setup notebook to show logger messages
import logging
import sys
from datetime import datetime
from IPython.display import display, HTML

class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)
            
            # Define colors for different log levels
            colors = {
                logging.DEBUG: '#6c757d',    # gray
                logging.INFO: '#0d6efd',     # blue
                logging.WARNING: '#ffc107',   # yellow
                logging.ERROR: '#dc3545',     # red
                logging.CRITICAL: '#721c24'   # dark red
            }
            
            # Get color for current log level
            color = colors.get(record.levelno, '#000000')
            
            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-sise: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime('%H:%M:%S')}</span>
                <span>{msg}</span>
            </div>
            """
            
            # Display the formatted log entry
            display(HTML(log_entry))
            
            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)
            
        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)

def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks
    
    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger
    logger = logging.getLogger()
    
    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
    
    # Set the logging level
    logger.setLevel(level)
    
    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    
    return logger

# Usage example:
logger = setup_notebook_logging(logging.INFO)

# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [12]:
logger = setup_notebook_logging()

# Package Tests

In [13]:
import numpy as np
import pandas as pd
def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [15]:
from chai import chAI
chai = chAI()

chAI Start


Loaded config for agent successfully.


Loaded config for agent successfully.


Initializing BedrockHandler


Found credentials in shared credentials file: ~/.aws/credentials


Creating ChatBedrock LLM instance


Found credentials in shared credentials file: ~/.aws/credentials


Successfully created ChatBedrock LLM instance


Setting up chAI agent


chAI agent successfully set up


In [16]:
sample_df = create_sample_dataset()

In [17]:
prompt = "Analyse this dataset and suggest visualisations to explore trends and insights."
response = chai.handle_request(data=sample_df, prompt=prompt)

Detected DataFrame input. Preparing to analyse...


Parsing DataFrame into structured JSON dictionary


Sending prompt and data to agent executor...


In [18]:
print(response)

📊 VISUALISATION SUGGESTIONS:


📈 TIME SERIES ANALYSIS OF SALES
Purpose: To visualize the trend of sales over time
Chart Type: Line chart
Variables: date (x-axis), sales (y-axis)
Expected Insights: Identify any seasonal patterns, overall trends, or unusual spikes/dips in sales

--------------------------------------------------

📈 CUSTOMER COUNT VS
Purpose: To examine the relationship between number of customers and sales
Chart Type: Scatter plot
Variables: customers (x-axis), sales (y-axis)
Expected Insights: Determine if there's a correlation between customer count and sales, identify any outliers

--------------------------------------------------

📈 SATISFACTION SCORE DISTRIBUTION
Purpose: To understand the distribution of customer satisfaction scores
Chart Type: Histogram or Box plot
Variables: satisfaction_score
Expected Insights: Analyze the range and frequency of satisfaction scores, identify any skewness or outliers

--------------------------------------------------

📈 SALES B

## Copycat functionality

Provide a chart image and ask LLM to provide the plotly code and output to an html file

In [19]:
prompt = """
    Please analyse this chart and provide a comprehensive review with the following structure:

    1. Key Findings:
    - What are the most significant patterns or insights?
    - What are the highest and lowest values?
    - Are there any notable disparities or trends?

    2. Comparative Analysis:
    - Compare the different outcome groups
    - Identify any significant gaps or differences
    - Highlight any unexpected patterns

    Please provide your analysis in clear, business-friendly language suitable for stakeholders.
    """

In [26]:
json_images = chai.handle_request(prompt = prompt, image_path='../tests/img/satisfaction.png')

Detected image location input. Preparing to review...


Encoding image to base64


Sending prompt and data to agent executor...


In [27]:
print(json_images['analysis'])

## Insights
1. The distribution of satisfaction scores is somewhat bimodal, with peaks around 4.2-4.4 and 4.8-5.0.
2. The highest count is in the 4.2-4.4 range, with approximately 18 occurrences.
3. The lowest count is in the 3.4-3.6 range, with about 7 occurrences.
4. There's a notable trend of increasing frequency as the satisfaction score increases, with a slight dip in the 4.6-4.8 range.
5. The majority of satisfaction scores fall between 4.0 and 5.0, indicating generally positive feedback.
6. There's a significant gap between the frequency of scores in the 3.4-3.6 range and the rest of the distribution.


In [28]:
print(json_images['code'])

import plotly.graph_objects as go
import numpy as np

# Create the data
satisfaction_scores = np.concatenate([
    np.random.uniform(3.4, 3.6, 7),
    np.random.uniform(3.6, 3.8, 12),
    np.random.uniform(3.8, 4.0, 12),
    np.random.uniform(4.0, 4.2, 12),
    np.random.uniform(4.2, 4.4, 18),
    np.random.uniform(4.4, 4.6, 13),
    np.random.uniform(4.6, 4.8, 12),
    np.random.uniform(4.8, 5.0, 16)
])

# Create the histogram
fig = go.Figure(data=[go.Histogram(
    x=satisfaction_scores,
    xbins=dict(start=3.4, end=5.0, size=0.2),
    marker_color='orange',
    opacity=0.75
)])

# Update the layout
fig.update_layout(
    title='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    bargap=0.1,
    xaxis=dict(range=[3.4, 5.0]),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='rgba(240, 240, 250, 0.5)',
    template='plotly_white'
)

fig.show()


## Asking for some template code

In [23]:
chart_prompt = "I want a red chart with bold axis titles and labels on the bars. There should also be a legend showing each distinct category"
chart_request = chai.handle_request(prompt = chart_prompt, chart_type='scatter')

Processing chart type request: scatter


Sending prompt and data to agent executor...


In [24]:
print(chart_request['code'])

import plotly.graph_objects as go
import numpy as np

# Generate sample data
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2 * x + np.random.normal(0, 2, 50)

# Create figure with multiple series
fig = go.Figure()

# Add multiple scatter series with different shades of red
series_data = [
    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#ff0000'},
    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#ff3333'},
    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#ff6666'},
    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#ff9999'}
]

for series in series_data:
    fig.add_trace(
        go.Scatter(
            x=series['x'],
            y=series['y'],
            mode='markers',
            name=series['name'],
            marker=dict(
                size=10,
                color=series['color'],
                opacity=0.7,
                line=dict(width=1, color='#444444')
            ),
            hovertemplate='x: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
