In [1]:
# RUN THIS BEFORE ATTEMPTING, OTHERWISE MAY GET CLIENT ERRORS ON SOME AWS CALLS
import boto3
boto3.setup_default_session()

In [2]:
import sys
import os
from pathlib import Path
notebook_dir = Path(os.getcwd()) 
project_root = notebook_dir.parent
sys.path.append(str(project_root))

In [3]:
from dotenv import load_dotenv
import os
from chAI.constants import LLMModel

# Load .env file
load_dotenv()
aws_profile = os.getenv('AWS_PROFILE')
llm_region = os.getenv('LLM_REGION')
llm_model = os.getenv('LLM_MODEL')
print(f"Using AWS Profile: {aws_profile}")
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

print("\nAvailable models in LLMModel enum:")
for model in LLMModel:
    print(f"- {model.name}: {model.value}")

Using AWS Profile: aws-prototype
Using AWS Region: us-east-1
Using LLM model: anthropic.claude-3-5-sonnet-20240620-v1:0

Available models in LLMModel enum:
- CLAUDE_V2_1: anthropic.claude-v2:1
- CLAUDE_SONNET_3_5: anthropic.claude-3-5-sonnet-20240620-v1:0
- CLAUDE_HAIKU_3_5: anthropic.claude-3-5-haiku-20241022-v1:0


In [4]:
# Setup notebook to show logger messages
import logging
import sys
from datetime import datetime
from IPython.display import display, HTML

class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)
            
            # Define colors for different log levels
            colors = {
                logging.DEBUG: '#6c757d',    # gray
                logging.INFO: '#0d6efd',     # blue
                logging.WARNING: '#ffc107',   # yellow
                logging.ERROR: '#dc3545',     # red
                logging.CRITICAL: '#721c24'   # dark red
            }
            
            # Get color for current log level
            color = colors.get(record.levelno, '#000000')
            
            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-sise: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime('%H:%M:%S')}</span>
                <span>{msg}</span>
            </div>
            """
            
            # Display the formatted log entry
            display(HTML(log_entry))
            
            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)
            
        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)

def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks
    
    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger
    logger = logging.getLogger()
    
    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
    
    # Set the logging level
    logger.setLevel(level)
    
    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    
    return logger

# Usage example:
logger = setup_notebook_logging(logging.INFO)

# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [5]:
logger = setup_notebook_logging()

# chAI testing and examples

In [6]:
import numpy as np
import pandas as pd
def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [7]:
from chAI.chAI import chAI
chai = chAI()

chAI Start


Loaded config for agent successfully.


Loaded config for agent successfully.


Initializing BedrockHandler


Found credentials in shared credentials file: ~/.aws/credentials


Creating ChatBedrock LLM instance


Found credentials in shared credentials file: ~/.aws/credentials


Successfully created ChatBedrock LLM instance


Setting up chAI agent


chAI agent successfully set up


## Visualisation Suggestions

In [8]:
sample_df = create_sample_dataset()

In [9]:
prompt = "Analyse this dataset and suggest visualisations to explore trends and insights."
response = chai.handle_request(data=sample_df, prompt=prompt)

Detected DataFrame input. Preparing to analyse...


Parsing DataFrame into structured JSON dictionary


Sending prompt and data to agent executor...


## Copycat functionality

Provide a chart image and ask LLM to provide the plotly code and output to an html file

In [9]:
prompt = """
    Please analyse this chart and provide a comprehensive review with the following structure:

    1. Key Findings:
    - What are the most significant patterns or insights?
    - What are the highest and lowest values?
    - Are there any notable disparities or trends?

    2. Comparative Analysis:
    - Compare the different outcome groups
    - Identify any significant gaps or differences
    - Highlight any unexpected patterns

    Please provide your analysis in clear, business-friendly language suitable for stakeholders.
    """

In [18]:
json_images = chai.handle_request(prompt = prompt, image_path='/Users/jose.orjales/c-af-chartist/tests/img/satisfaction.png')

Detected image location input. Preparing to review...


Encoding image to base64


ValueError: Invalid format specifier ' "## Insights\n1. <insight1>\n2. <insight2>\n...",
                "path": "<path returned by save_plotly_visualisation>",
                "code": "<complete plotly code used>"
            ' for object of type 'str'

In [12]:
print(json_images['analysis'])

## Insights
1. The distribution appears to be somewhat bimodal, with peaks around 4.2-4.4 and 4.8-5.0.
2. There's a notable increase in frequency for the highest satisfaction scores (4.8-5.0), suggesting many respondents gave very high ratings.
3. The lowest satisfaction scores (3.4-3.6) have the smallest counts, indicating fewer people gave low ratings.
4. There's a dip in the middle range (around 4.6), which is interesting as it separates the two peaks.
5. The overall distribution is skewed towards higher satisfaction scores, with most counts falling above 4.0.


In [13]:
print(json_images['code'])

import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 12, 13, 18, 13, 13, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange',
    width=0.2
)])

fig.update_layout(
    title_text='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    xaxis=dict(tickmode='linear', dtick=0.2),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='rgba(240, 245, 255, 1)',
    template='plotly_white'
)

fig.show()


In [14]:
print(json_images['path'])

/Users/jose.orjales/c-af-chartist/Notebooks/plotly_visualisations/visualisation_1739464967.html


In [None]:
import plotly.graph_objects as go

satisfaction_scores = [3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0]
counts = [7, 7, 12, 12, 13, 18, 13, 12, 16]

fig = go.Figure(data=[go.Bar(
    x=satisfaction_scores,
    y=counts,
    marker_color='orange',
    width=0.2
)])

fig.update_layout(
    title_text='Satisfaction Score Distribution',
    xaxis_title='Satisfaction Score',
    yaxis_title='Count',
    xaxis=dict(tickmode='linear', dtick=0.2),
    yaxis=dict(range=[0, 20]),
    plot_bgcolor='rgba(240, 240, 250, 0.5)',
    template='plotly_white'
)

fig.show()

## Asking for some template code

In [11]:
chart_prompt = "I want a red chart with bold axis titles and labels on the bars. There should also be a legend showing each distinct category"

In [15]:
chart_request = chai.handle_request(prompt = chart_prompt, chart_type='scatter')


            import plotly.graph_objects as go
            import numpy as np

            # Generate sample data
            np.random.seed(42)
            x = np.random.uniform(0, 10, 50)
            y = 2 * x + np.random.normal(0, 2, 50)

            # Create figure with multiple series
            fig = go.Figure()

            # Add multiple scatter series with different colors
            series_data = [
                {'x': x, 'y': y, 'name': 'Series 1', 'color': '#1f77b4'},
                {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#2d91c2'},
                {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#7eb0d5'},
                {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#bddbf5'}
            ]

            for series in series_data:
                fig.add_trace(
                    go.Scatter(
                        x=series['x'],
                        y=series['y'],
                        mode='markers',
                        name=series['name'

Processing chart type request: scatter


Sending prompt and data to agent executor...


In [16]:
print(chart_request)

{'path': '/Users/jose.orjales/c-af-chartist/Notebooks/plotly_visualisations/visualisation_1739465694.html', 'code': "import plotly.graph_objects as go\nimport numpy as np\n\n# Generate sample data\nnp.random.seed(42)\nx = np.random.uniform(0, 10, 50)\ny = 2 * x + np.random.normal(0, 2, 50)\n\n# Create figure with multiple series\nfig = go.Figure()\n\n# Add multiple scatter series with different shades of red\nseries_data = [\n    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#FF0000'},\n    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#FF3333'},\n    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#FF6666'},\n    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#FF9999'}\n]\n\nfor series in series_data:\n    fig.add_trace(\n        go.Scatter(\n            x=series['x'],\n            y=series['y'],\n            mode='markers',\n            name=series['name'],\n            marker=dict(\n                size=10,\n                color=series['color'],\n                opac

In [17]:
print(chart_request['code'])

import plotly.graph_objects as go
import numpy as np

# Generate sample data
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2 * x + np.random.normal(0, 2, 50)

# Create figure with multiple series
fig = go.Figure()

# Add multiple scatter series with different shades of red
series_data = [
    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#FF0000'},
    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#FF3333'},
    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#FF6666'},
    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#FF9999'}
]

for series in series_data:
    fig.add_trace(
        go.Scatter(
            x=series['x'],
            y=series['y'],
            mode='markers',
            name=series['name'],
            marker=dict(
                size=10,
                color=series['color'],
                opacity=0.7,
                line=dict(width=1, color='#444444')
            ),
            hovertemplate='x: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'


In [13]:
print(chart_request['path'])

/Users/jose.orjales/c-af-chartist/Notebooks/plotly_visualisations/visualisation_1739465354.html


In [None]:
import plotly.graph_objects as go
import numpy as np

# Generate sample data
np.random.seed(42)
x = np.random.uniform(0, 10, 50)
y = 2 * x + np.random.normal(0, 2, 50)

# Create figure with multiple series
fig = go.Figure()

# Add multiple scatter series with different shades of red
series_data = [
    {'x': x, 'y': y, 'name': 'Series 1', 'color': '#FF0000'},
    {'x': x, 'y': y + 2, 'name': 'Series 2', 'color': '#FF3333'},
    {'x': x, 'y': y - 2, 'name': 'Series 3', 'color': '#FF6666'},
    {'x': x, 'y': y * 0.8, 'name': 'Series 4', 'color': '#FF9999'}
]

for series in series_data:
    fig.add_trace(
        go.Scatter(
            x=series['x'],
            y=series['y'],
            mode='markers',
            name=series['name'],
            marker=dict(
                size=10,
                color=series['color'],
                opacity=0.7,
                line=dict(width=1, color='#444444')
            ),
            hovertemplate='x: %{x:.2f}<br>y: %{y:.2f}<extra></extra>'
        )
    )

# Update layout with professional styling and bold axis titles
fig.update_layout(
    title={
        'text': 'Default Scatter Plot',
        'font': {'size': 24, 'color': '#FF0000'},
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title={
        'text': 'X Values',
        'font': {'size': 16, 'color': '#444444', 'weight': 'bold'}
    },
    yaxis_title={
        'text': 'Y Values',
        'font': {'size': 16, 'color': '#444444', 'weight': 'bold'}
    },
    template='plotly_white',
    showlegend=True,
    legend={
        'orientation': 'h',
        'yanchor': 'bottom',
        'y': -0.3,
        'xanchor': 'center',
        'x': 0.5
    },
    margin=dict(t=100, l=100, r=50, b=100)
)

# Update axis labels to be bold
fig.update_xaxes(tickfont=dict(weight='bold'))
fig.update_yaxes(tickfont=dict(weight='bold'))

fig.show()