In [1]:
# RUN THIS BEFORE ATTEMPTING, OTHERWISE MAY GET CLIENT ERRORS ON SOME AWS CALLS
import boto3
boto3.setup_default_session()

In [2]:
import sys
import os
from pathlib import Path
notebook_dir = Path(os.getcwd()) 
project_root = notebook_dir.parent
sys.path.append(str(project_root))

In [3]:
from dotenv import load_dotenv
import os
from chAI.constants import LLMModel

# Load .env file
load_dotenv()
aws_profile = os.getenv('AWS_PROFILE')
llm_region = os.getenv('LLM_REGION')
llm_model = os.getenv('LLM_MODEL')
print(f"Using AWS Profile: {aws_profile}")
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

print("\nAvailable models in LLMModel enum:")
for model in LLMModel:
    print(f"- {model.name}: {model.value}")

Using AWS Profile: aws-prototype
Using AWS Region: us-east-1
Using LLM model: anthropic.claude-3-5-sonnet-20240620-v1:0

Available models in LLMModel enum:
- CLAUDE_V2_1: anthropic.claude-v2:1
- CLAUDE_SONNET_3_5: anthropic.claude-3-5-sonnet-20240620-v1:0
- CLAUDE_HAIKU_3_5: anthropic.claude-3-5-haiku-20241022-v1:0


In [4]:
# Setup notebook to show logger messages
import logging
import sys
from datetime import datetime
from IPython.display import display, HTML

class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)
            
            # Define colors for different log levels
            colors = {
                logging.DEBUG: '#6c757d',    # gray
                logging.INFO: '#0d6efd',     # blue
                logging.WARNING: '#ffc107',   # yellow
                logging.ERROR: '#dc3545',     # red
                logging.CRITICAL: '#721c24'   # dark red
            }
            
            # Get color for current log level
            color = colors.get(record.levelno, '#000000')
            
            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-sise: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime('%H:%M:%S')}</span>
                <span>{msg}</span>
            </div>
            """
            
            # Display the formatted log entry
            display(HTML(log_entry))
            
            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)
            
        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)

def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks
    
    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger
    logger = logging.getLogger()
    
    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
    
    # Set the logging level
    logger.setLevel(level)
    
    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    
    return logger

# Usage example:
logger = setup_notebook_logging(logging.INFO)

# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [5]:
logger = setup_notebook_logging()

# chAI testing and examples

In [6]:
import numpy as np
import pandas as pd
def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [7]:
from chAI.chAI import chAI
chai = chAI()

chAI Start


Loaded config for agent successfully.


Loaded config for agent successfully.


Initializing BedrockHandler


Found credentials in shared credentials file: ~/.aws/credentials


Creating ChatBedrock LLM instance


Found credentials in shared credentials file: ~/.aws/credentials


Successfully created ChatBedrock LLM instance


Setting up chAI agent


chAI agent successfully set up


## Visualisation Suggestions

In [8]:
sample_df = create_sample_dataset()

In [9]:
prompt = "Analyse this dataset and suggest visualisations to explore trends and insights."
response = chai.handle_request(data=sample_df, prompt=prompt)

Detected DataFrame input. Preparing to analyse...


Parsing DataFrame into structured JSON dictionary


Sending prompt and data to agent executor...


{'input': '\n            User Prompt:\n            Analyse this dataset and suggest visualisations to explore trends and insights.\n            \n\n\n            DataFrame Information:\n            Shape: 100 rows, 5 columns\n            \n            Columns:\n            date, sales, customers, satisfaction_score, category\n            \n            Sample Data:\n            [\n  {\n    "date": "2023-01-01T00:00:00",\n    "sales": 1049.6714153011233,\n    "customers": 186,\n    "satisfaction_score": 4.3524629050032075,\n    "category": "A"\n  },\n  {\n    "date": "2023-01-02T00:00:00",\n    "sales": 986.1735698828816,\n    "customers": 111,\n    "satisfaction_score": 3.6405121517421386,\n    "category": "A"\n  },\n  {\n    "date": "2023-01-03T00:00:00",\n    "sales": 1064.7688538100692,\n    "customers": 100,\n    "satisfaction_score": 4.05157370458915,\n    "category": "A"\n  },\n  {\n    "date": "2023-01-04T00:00:00",\n    "sales": 1152.3029856408025,\n    "customers": 108,\n    "s

## Copycat functionality

Provide a chart image and ask LLM to provide the plotly code and output to an html file

In [9]:
prompt = """
    Please analyse this chart and provide a comprehensive review with the following structure:

    1. Key Findings:
    - What are the most significant patterns or insights?
    - What are the highest and lowest values?
    - Are there any notable disparities or trends?

    2. Comparative Analysis:
    - Compare the different outcome groups
    - Identify any significant gaps or differences
    - Highlight any unexpected patterns

    Please provide your analysis in clear, business-friendly language suitable for stakeholders.
    """

In [10]:
json_images = chai.handle_request(prompt = prompt, image_path='/Users/jose.orjales/c-af-chartist/tests/img/satisfaction.png', output_path='/Users/jose.orjales/c-af-chartist/tests/img')

Detected image location input. Preparing to review...


Encoding image to base64



            Review the supplied image information and create an interactive visualisation that matches it as closely as possible.
            
            IMPORTANT: You MUST use the format_image_analysis_output tool first to structure the image analysis.
            
            1. Format the analysis:
            - Use the format_image_analysis_output tool with this exact input: # Description
This image shows a histogram or bar chart representing the distribution of satisfaction scores. The chart displays the count of occurrences for different satisfaction score ranges from 3.4 to 5.0.

# Chart Analysis
## Type
The visualization is a histogram or bar chart showing the frequency distribution of satisfaction scores.

## Axes
- X-axis: Satisfaction Score, ranging from 3.4 to 5.0 in increments of 0.2
- Y-axis: Count, representing the number of occurrences for each satisfaction score range

## Insights
- The distribution appears to be somewhat bimodal, with peaks around 4.2-4.4 and 4.8-5

Sending prompt and data to agent executor...


{'input': '\n            Review the supplied image information and create an interactive visualisation that matches it as closely as possible.\n            \n            IMPORTANT: You MUST use the format_image_analysis_output tool first to structure the image analysis.\n            \n            1. Format the analysis:\n            - Use the format_image_analysis_output tool with this exact input: # Description\nThis image shows a histogram or bar chart representing the distribution of satisfaction scores. The chart displays the count of occurrences for different satisfaction score ranges from 3.4 to 5.0.\n\n# Chart Analysis\n## Type\nThe visualization is a histogram or bar chart showing the frequency distribution of satisfaction scores.\n\n## Axes\n- X-axis: Satisfaction Score, ranging from 3.4 to 5.0 in increments of 0.2\n- Y-axis: Count, representing the number of occurrences for each satisfaction score range\n\n## Insights\n- The distribution appears to be somewhat bimodal, with p

In [11]:
print(json_images['analysis'])

## Insights
1. The distribution appears to be somewhat bimodal, with peaks around 4.2-4.4 and 4.8-5.0.
2. The highest frequency occurs in the 4.2-4.4 range, with about 17-18 counts.
3. There's a second peak at the highest satisfaction score range (4.8-5.0), with about 15-16 counts.
4. The lowest frequencies are in the 3.4-3.6 range, suggesting fewer low satisfaction scores.
5. Overall, the satisfaction scores seem to be skewed towards the higher end, indicating generally positive feedback.
6. There's a noticeable dip in frequency for the 4.6-4.8 range, which could be interesting to investigate further.


In [12]:
print(json_images['code'])

import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 12, 17, 13, 12, 16, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange',
    width=0.2
)])

fig.update_layout(
    title='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    xaxis=dict(tickmode='linear', dtick=0.2),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='rgba(240, 240, 250, 0.8)',
    template='plotly_white'
)

fig.show()


In [13]:
print(json_images['path'])

/Users/jose.orjales/plotly_visualisations/visualisation_1739456246.html


In [14]:
import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 12, 13, 18, 13, 13, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange'
)])

fig.update_layout(
    title_text='Satisfaction Score Distribution',
    xaxis_title_text='Satisfaction Score',
    yaxis_title_text='Count',
    xaxis=dict(
        tickmode='array',
        tickvals=satisfaction_scores,
        ticktext=[str(score) for score in satisfaction_scores]
    ),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='rgba(230, 235, 245, 1)',
    width=800,
    height=500,
    template='plotly_white'
)

fig.show()

## Asking for some template code

In [14]:
chart_prompt = "I want a red chart with bold axis titles and labels on the bars. There should also be a legend showing each distinct category"

In [15]:
json_images = chai.handle_request(prompt = chart_prompt, output_path='/Users/jose.orjales/c-af-chartist/tests/img', chart_type='scatter')

Processing chart type request: scatter


Sending prompt and data to agent executor...


{'input': '\n            Create a default scatter chart visualisation using this template as reference:\n\n            # Template Code:\n            \n            import plotly.graph_objects as go\n            import numpy as np\n\n            # Generate sample data\n            np.random.seed(42)\n            x = np.random.uniform(0, 10, 50)\n            y = 2 * x + np.random.normal(0, 2, 50)\n\n            # Create figure with multiple series\n            fig = go.Figure()\n\n            # Add multiple scatter series with different colors\n            series_data = [\n                {\'x\': x, \'y\': y, \'name\': \'Series 1\', \'color\': \'#1f77b4\'},\n                {\'x\': x, \'y\': y + 2, \'name\': \'Series 2\', \'color\': \'#2d91c2\'},\n                {\'x\': x, \'y\': y - 2, \'name\': \'Series 3\', \'color\': \'#7eb0d5\'},\n                {\'x\': x, \'y\': y * 0.8, \'name\': \'Series 4\', \'color\': \'#bddbf5\'}\n            ]\n\n            for series in series_data:\n     

In [16]:
print(json_images['path'])

/Users/jose.orjales/plotly_visualisations/visualisation_1739456288.html


In [17]:
print(json_images['code'])

import plotly.graph_objects as go
import numpy as np

# Generate sample data
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2 * x + np.random.normal(0, 2, 50)

# Create figure with multiple series
fig = go.Figure()

# Add multiple scatter series with different shades of red
series_data = [
    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#ff0000'},
    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#ff3333'},
    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#ff6666'},
    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#ff9999'}
]

for series in series_data:
    fig.add_trace(
        go.Scatter(
            x=series['x'],
            y=series['y'],
            mode='markers',
            name=series['name'],
            marker=dict(
                size=10,
                color=series['color'],
                opacity=0.7,
                line=dict(width=1, color='#444444')
            ),
            hovertemplate='x: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
