In [1]:
# RUN THIS BEFORE ATTEMPTING, OTHERWISE MAY GET CLIENT ERRORS ON SOME AWS CALLS
import boto3
boto3.setup_default_session()

In [2]:
import sys
import os
from pathlib import Path
notebook_dir = Path(os.getcwd()) 
project_root = notebook_dir.parent
sys.path.append(str(project_root))

In [3]:
from dotenv import load_dotenv
import os
from chAI.constants import LLMModel

# Load .env file
load_dotenv()
aws_profile = os.getenv('AWS_PROFILE')
llm_region = os.getenv('LLM_REGION')
llm_model = os.getenv('LLM_MODEL')
print(f"Using AWS Profile: {aws_profile}")
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

print("\nAvailable models in LLMModel enum:")
for model in LLMModel:
    print(f"- {model.name}: {model.value}")

Using AWS Profile: aws-prototype
Using AWS Region: us-east-1
Using LLM model: anthropic.claude-3-5-sonnet-20240620-v1:0

Available models in LLMModel enum:
- CLAUDE_V2_1: anthropic.claude-v2:1
- CLAUDE_SONNET_3_5: anthropic.claude-3-5-sonnet-20240620-v1:0
- CLAUDE_HAIKU_3_5: anthropic.claude-3-5-haiku-20241022-v1:0


In [4]:
# Setup notebook to show logger messages
import logging
import sys
from datetime import datetime
from IPython.display import display, HTML

class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)
            
            # Define colors for different log levels
            colors = {
                logging.DEBUG: '#6c757d',    # gray
                logging.INFO: '#0d6efd',     # blue
                logging.WARNING: '#ffc107',   # yellow
                logging.ERROR: '#dc3545',     # red
                logging.CRITICAL: '#721c24'   # dark red
            }
            
            # Get color for current log level
            color = colors.get(record.levelno, '#000000')
            
            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-sise: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime('%H:%M:%S')}</span>
                <span>{msg}</span>
            </div>
            """
            
            # Display the formatted log entry
            display(HTML(log_entry))
            
            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)
            
        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)

def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks
    
    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger
    logger = logging.getLogger()
    
    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
    
    # Set the logging level
    logger.setLevel(level)
    
    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    
    return logger

# Usage example:
logger = setup_notebook_logging(logging.INFO)

# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [5]:
logger = setup_notebook_logging()

# chAI testing and examples

In [6]:
import numpy as np
import pandas as pd
def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [7]:
from chAI.chAI import chAI
chai = chAI()

chAI Start


Loaded config for agent successfully.


Loaded config for agent successfully.


Initializing BedrockHandler


Found credentials in shared credentials file: ~/.aws/credentials


Creating ChatBedrock LLM instance


Found credentials in shared credentials file: ~/.aws/credentials


Successfully created ChatBedrock LLM instance


Setting up chAI agent


chAI agent successfully set up


## Visualisation Suggestions

In [8]:
sample_df = create_sample_dataset()

In [9]:
prompt = "Analyse this dataset and suggest visualisations to explore trends and insights."
response = chai.handle_request(data=sample_df, prompt=prompt)
print(response['output'])

Detected DataFrame input. Preparing to analyse...


Sending prompt and data to agent executor...


{'input': '\n            User Prompt:\n            Analyse this dataset and suggest visualisations to explore trends and insights.\n            \n\n\n            DataFrame Information (Top 100 Rows):\n            The following is a JSON representation of the DataFrame. Use this to suggest suitable visualisations:\n            {"columns":["date","sales","customers","satisfaction_score","category"],"index":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99],"data":[[1672531200000,1049.6714153011,186,4.352462905,"A"],[1672617600000,986.1735698829,111,3.6405121517,"A"],[1672704000000,1064.7688538101,100,4.0515737046,"A"],[1672790400000,1152.3029856408,108,3.8978035515,"A"],[1672876800000,976.5846625277,167,3.8659844651,"A"],[1672963200000,976.5863043051,

## Copycat functionality

Provide a chart image and ask LLM to provide the plotly code and output to an html file

In [10]:
prompt = """
    Please analyse this chart and provide a comprehensive review with the following structure:

    1. Key Findings:
    - What are the most significant patterns or insights?
    - What are the highest and lowest values?
    - Are there any notable disparities or trends?

    2. Comparative Analysis:
    - Compare the different outcome groups
    - Identify any significant gaps or differences
    - Highlight any unexpected patterns

    Please provide your analysis in clear, business-friendly language suitable for stakeholders.
    """

In [11]:
json_images = chai.handle_request(prompt = prompt, image_path='/Users/jose.orjales/c-af-chartist/tests/img/satisfaction.png', output_path='/Users/jose.orjales/c-af-chartist/tests/img')

Detected image location input. Preparing to review...


Encoding image to base64


Claude markdown response: # Description
The image shows a histogram representing the distribution of satisfaction scores. The chart displays the frequency or count of scores across different ranges from 3.4 to 5.0.

# Chart Analysis
## Type
This is a histogram chart, which is used to display the distribution of a continuous variable by dividing it into discrete bins and showing the frequency of data points falling into each bin.

## Axes
- X-axis: Satisfaction Score, ranging from 3.4 to 5.0 in increments of 0.2
- Y-axis: Count, representing the frequency of scores in each bin, ranging from 0 to approximately 18

## Insights
- The distribution appears to be somewhat bimodal, with peaks around 4.2-4.4 and 4.8-5.0.
- The highest frequency is observed in the 4.2-4.4 range, with a count of about 18.
- There's another notable peak in the highest range of 4.8-5.0, suggesting a significant number of very satisfied respondents.
- The lowest frequencies are observed in the 3.4-3.6 range, indicat

Sending prompt and data to agent executor...


{'input': "\n                    Use the image_analysis_formatter tool to standardise the output and create an interactive visualisation using appropriate default chart templates as reference.\n                    \n                    1. First, format the analysis using image_analysis_formatter with these parameters:\n                    - image_information: # Description\nThe image shows a histogram representing the distribution of satisfaction scores. The chart displays the frequency or count of scores across different ranges from 3.4 to 5.0.\n\n# Chart Analysis\n## Type\nThis is a histogram chart, which is used to display the distribution of a continuous variable by dividing it into discrete bins and showing the frequency of data points falling into each bin.\n\n## Axes\n- X-axis: Satisfaction Score, ranging from 3.4 to 5.0 in increments of 0.2\n- Y-axis: Count, representing the frequency of scores in each bin, ranging from 0 to approximately 18\n\n## Insights\n- The distribution a

In [15]:
import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 12, 18, 13, 12, 16, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange',
    width=0.2
)])

fig.update_layout(
    title='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    plot_bgcolor='aliceblue',
    bargap=0
)

fig.show()

## Asking for some template code

In [13]:
chart_prompt = "I want a red chart with bold axis titles and labels on the bars. There should also be a legend showing each distinct category"

In [14]:
json_images = chai.handle_request(prompt = chart_prompt, output_path='/Users/jose.orjales/c-af-chartist/tests/img', chart_type='scatter')

Sending prompt and data to agent executor...


Plot saved to: /Users/jose.orjales/c-af-chartist/tests/img/red_scatter_plot.html
{'input': "\n            Create a default scatter chart visualisation using this template as reference:\n\n            # Template Code:\n            \n            import plotly.graph_objects as go\n            import numpy as np\n\n            # Generate sample data\n            np.random.seed(42)\n            x = np.random.uniform(0, 10, 50)\n            y = 2 * x + np.random.normal(0, 2, 50)\n\n            # Create figure with multiple series\n            fig = go.Figure()\n\n            # Add multiple scatter series with different colors\n            series_data = [\n                {'x': x, 'y': y, 'name': 'Series 1', 'color': '#1f77b4'},\n                {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#2d91c2'},\n                {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#7eb0d5'},\n                {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#bddbf5'}\n            ]\n\n            