In [1]:
import json
import logging
import os
import time
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List, Literal, Optional
from uuid import uuid4
import boto3
from dotenv import load_dotenv



logger = logging.getLogger()

# Set up notebook so it can show logging messages and avoid print statements in cell blocks for debugging.

In [2]:
import logging
import sys
from datetime import datetime
from IPython.display import display, HTML

class VSCodeNotebookHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.log_buffer = []

    def emit(self, record):
        try:
            # Format the message
            msg = self.format(record)
            
            # Define colors for different log levels
            colors = {
                logging.DEBUG: '#6c757d',    # gray
                logging.INFO: '#0d6efd',     # blue
                logging.WARNING: '#ffc107',   # yellow
                logging.ERROR: '#dc3545',     # red
                logging.CRITICAL: '#721c24'   # dark red
            }
            
            # Get color for current log level
            color = colors.get(record.levelno, '#000000')
            
            # Create HTML formatted log entry
            log_entry = f"""
            <div style='
                padding: 2px 6px;
                margin: 2px 0;
                border-left: 3px solid {color};
                background-color: #f8f9fa;
                font-family: Monaco, monospace;
                font-size: 12px;
            '>
                <span style='color: {color}; font-weight: bold;'>{record.levelname}</span>
                <span style='color: #666; margin-right: 8px;'>{datetime.now().strftime('%H:%M:%S')}</span>
                <span>{msg}</span>
            </div>
            """
            
            # Display the formatted log entry
            display(HTML(log_entry))
            
            # Also print to stderr for VSCode's native console
            print(msg, file=sys.stderr)
            
        except Exception as e:
            print(f"Error in log handler: {str(e)}", file=sys.stderr)

def setup_notebook_logging(level=logging.INFO):
    """
    Configure logging for VSCode Jupyter notebooks
    
    Args:
        level: The logging level to use (default: INFO)
    Returns:
        logger: Configured logger instance
    """
    # Create or get the root logger
    logger = logging.getLogger()
    
    # Remove any existing handlers
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
    
    # Set the logging level
    logger.setLevel(level)
    
    # Create and configure the VSCode notebook handler
    handler = VSCodeNotebookHandler()
    formatter = logging.Formatter('%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    
    return logger

# Usage example:
logger = setup_notebook_logging(logging.INFO)

# Test the logger
def test_logging():
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warning("This is a warning message")
    logger.error("This is an error message")
    logger.critical("This is a critical message")

In [3]:
logger = setup_notebook_logging()

# Chartist

In [4]:
# Load .env file
load_dotenv()
aws_profile = os.getenv('AWS_PROFILE')
llm_region = os.getenv('LLM_REGION')
llm_model = os.getenv('LLM_MODEL')
print(f"Using AWS Profile: {aws_profile}")
print(f"Using AWS Region: {llm_region}")
print(f"Using LLM model: {llm_model}")

Using AWS Profile: aws-prototype
Using AWS Region: us-east-1
Using LLM model: anthropic.claude-3-5-sonnet-20240620-v1:0


In [5]:
from langchain import hub
from langchain.agents import (
    AgentExecutor,
    tool,
    create_tool_calling_agent,
    create_json_chat_agent,
)
from langchain_core.tools import StructuredTool, Tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_aws import ChatBedrock
from langchain_experimental.utilities import PythonREPL
from langchain_core.messages import HumanMessage

In [6]:
from config import Config
from aws.bedrock import BedrockHandler
from tools.visualisation_formatter import create_formatting_tool
from tools.image_analysis_formatter import create_analysis_formatter_tool
from tools.save_plotly import create_save_plotly_tool

In [7]:
import pandas as pd
import base64

class chartist:
    def __init__(self, region_name: str = "us-east-1"):
        """
        Initialize chartist with required configuration

        Parameters:
        region_name (str): AWS region name
        """
        logger.info("Chartist Start")
        self.config = Config()
        self.bedrock = BedrockHandler(self.config)
        self.bedrock_runtime = self.bedrock.set_runtime()
        self.llm = self.bedrock.get_llm()
        self.prompt = hub.pull("hwchase17/react-chat-json")
    

        self.tools = [
            create_formatting_tool(),
            create_analysis_formatter_tool(),
            create_save_plotly_tool()            
        ]
        self.agent_executor = self.set_agent_executor()

        # Set up holder for visualisations
        self.visualizations = None

    def set_agent_executor(self, verbose=False, handle_parse=True):
        logger.info("Setting up chartist executor")
        try:
            agent = create_json_chat_agent(self.llm, self.tools, self.prompt)
            logger.debug("Structured chat agent created")

            executor = AgentExecutor(
                agent=agent,
                tools=self.tools,
                verbose=verbose,
                handle_parsing_errors=handle_parse,
            )
            logger.info("Agent executor successfully set up")
            return executor
        except Exception as e:
            logger.error(f"Error setting up agent executor: {str(e)}")
            raise

    # Specific use cases - Dataframe ingestion and visualisation suggestion
    def parse_dataframe(self, df: pd.DataFrame) -> dict:
        """
        Extract useful information from a DataFrame and return it as a JSON-like dictionary.

        Parameters:
        df: DataFrame to analyze.

        Returns:
        dict: A structured dictionary containing DataFrame details.
        """
        logger.info("Parsing DataFrame into structured JSON dictionary")
        try:
            try:
                summary = df.describe(include="all").to_dict()
            except ValueError as e:
                logger.warning(f"Could not generate full summary statistics: {e}")
                summary = {}
            
            data_info = {
                "columns": [{"name": col, "dtype": str(dtype)} for col, dtype in df.dtypes.items()],
                "shape": {"rows": df.shape[0], "columns": df.shape[1]},
                "summary": summary,
                "sample_data": df.head(10).to_dict(orient="records"),
            }
            logger.debug(f"DataFrame parsed: {data_info}")
            return data_info
        except Exception as e:
            logger.error(f"Error parsing DataFrame: {str(e)}")
            raise
        
    def encode_image(self, image_path) -> str:
        """
        Encode one or more images into Base64 ready for Claude
        
        Parameters:
            image_paths str: Path to the chart image
        
        Returns:
            image_contents str: base64 encoded image string data
        
        Raises:
            Exception: If there's an error processing the request or calling Bedrock
        """
        logger.info("Encoding image to base64")
        try:
            # Create image content string
            with open(image_path, "rb") as image_file:
                encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

            return encoded_image
            
        except Exception as e:
            logger.error(f"Error encoding image: {str(e)}")
            raise
        
    def analyse_image(self, base64_data):
        """
        Necessary because trying to pass it into an agent tool fails as agent will truncate the base64 image
        data.
        
        """
        try:
            if not base64_data:
                return "Error: No image data provided"
        
            analysis_prompt = """Analyse this image and provide a detailed analysis using the following structure:

                # Description
                [Provide a detailed description of what the image shows]

                # Chart Analysis
                ## Type
                [Specify the type of visualization (e.g., bar chart, line plot, scatter plot)]

                ## Axes
                [List all axes and what they represent]

                ## Insights
                [List key patterns, trends, or insights visible in the chart]

                # Plotly Recreation
                ## Code
                ```python
                [Provide a complete Plotly code snippet that could recreate this visualization]
                ```

                ## Data Structure
                [Describe the data structure needed for the Plotly code]"""
            
            body = {
                    "anthropic_version": "bedrock-2023-05-31",
                    "max_tokens": 2000,
                    "messages": [
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "source": {
                                        "type": "base64",
                                        "media_type": "image/png",
                                        "data": base64_data,
                                    },
                                },
                                {"type": "text", "text": analysis_prompt},
                            ],
                        }
                    ],
                }
            
            body_bytes = json.dumps(body).encode("utf-8")

            logger.debug(f"Request body: {json.dumps(body, indent=2)}")

            # Call Bedrock
            response = self.bedrock_runtime.invoke_model(
                modelId=self.config.LLM_MODEL, body=body_bytes
            )

            response_body = json.loads(response.get("body").read())
            return response_body.get("content", [])[0].get("text", "")

        except Exception as e:
            logger.error(f"Full error in analyse_image: {str(e)}")
            return f"Error analysing image: {str(e)}"

    def handle_request(self, data=None, prompt=None, image_path=None, **kwargs):
        """
        Handle user input by deciding how to process it, based on the type of data provided.

        Parameters:
        data: Input data for the agent. Could be a DataFrame or other types.
        prompt: String prompt to guide the agent.
        image_paths: List of image paths to convert and pass for analysis

        Returns:
        Response generated by the agent executor.
        """
        
        base_prompt = f"""
            User Prompt:
            {prompt}
            """
        
        if isinstance(data, pd.DataFrame):
            logger.info("Detected DataFrame input. Preparing to analyze...")

            # Limit the DataFrame to the top 100 rows if necessary
            max_rows = 100
            if len(data) > max_rows:
                logger.info(f"DataFrame has more than {max_rows} rows. Trimming for processing.")
                data = data.head(max_rows)

            # Convert DataFrame to JSON string
            dataframe_json = data.to_json(orient="split")

            # Construct the combined prompt - TODO: maybe instead of creating a separate prompt, we add the bits
            # to the prompt depending on what is being analysed as we do with citations.

            dataframe_prompt = f"""
                        DataFrame Information (Top {len(data)} Rows):
                        The following is a JSON representation of the DataFrame. Use this to suggest suitable visualizations:
                        {dataframe_json}

                        Instructions:
                        1. Analyze the DataFrame to understand its structure and content
                        2. Suggest meaningful visualizations based on the data and user's instructions
                        3. For each visualization, include:
                        - Clear purpose
                        - Chart type
                        - Variables used
                        - Expected insights
                        4. Use the format_visualization_output tool to structure your response
                        5. Make sure to provide concrete, specific suggestions based on the actual data

                        Remember to use the formatting tool for your final output.
                        """

            final_prompt = f"""
                {base_prompt}

                {dataframe_prompt}
                """
        
        if isinstance(image_path, str):
            logger.info("Detected image location input. Preparing to review...")
            
            # Get encoded image data
            image_base64 = self.encode_image(image_path)
            image_response = self.analyse_image(image_base64)
            
            logger.info(f"Claude markdown response: {image_response}")
                
            final_prompt = f"""
                Use the image_analysis_formatter tool to standardize the output and the save_plotly_visualization tool to create an interactive visualization.
                
                1. First, format the analysis using image_analysis_formatter with these parameters:
                - image_information: {image_response}
                
                2. Then, extract the Plotly code from the formatted response and use save_plotly_visualization to create an HTML file.
                
                3. Return both the formatted analysis and the path to the saved visualization.
            """
        
        # Send to the agent executor
        try:
            logger.info("Sending prompt and data to agent executor...")
            response = self.agent_executor.invoke({"input": final_prompt})
            print(response)
            return response
        except Exception as e:
            logger.error(f"Error in agent executor: {str(e)}")
            return f"Error processing visualization suggestions: {str(e)}"
            

        #elif prompt:
        #    logger.info("No DataFrame detected. Using agent executor for the provided prompt...")
        #    try:
        #        response = self.agent_executor.invoke({"input": prompt})
        #        return response
        #    except Exception as e:
        #        logger.error(f"Error in agent executor: {str(e)}")
        #        return f"Error processing prompt: {str(e)}"

        #else:
        #    logger.warning("No input provided to handle_request method.")
        #    return "Please provide a DataFrame or a prompt for the agent to process."
        

In [8]:
import numpy as np
def create_sample_dataset():
    """Create a sample dataset for testing"""
    np.random.seed(42)
    dates = pd.date_range(start="2023-01-01", periods=100, freq="D")

    sample_df = pd.DataFrame(
        {
            "date": dates,
            "sales": np.random.normal(1000, 100, 100),
            "customers": np.random.randint(50, 200, 100),
            "satisfaction_score": np.random.uniform(3.5, 5.0, 100),
            "category": np.random.choice(["A", "B", "C"], 100),
        }
    )

    return sample_df

In [9]:
chartist = chartist()

Chartist Start


Loaded config for agent successfully.


Initializing BedrockHandler


Found credentials in shared credentials file: ~/.aws/credentials


Creating ChatBedrock LLM instance


Found credentials in shared credentials file: ~/.aws/credentials


Successfully created ChatBedrock LLM instance


Setting up chartist executor


Agent executor successfully set up


## Test Visualisation from a dataset

In [10]:
sample_df = create_sample_dataset()

In [11]:
prompt = "Analyze this dataset and suggest visualizations to explore trends and insights."
response = chartist.handle_request(data=sample_df, prompt=prompt)

Detected DataFrame input. Preparing to analyze...


Sending prompt and DataFrame to agent executor...


In [12]:
print(response['output'])

📊 VISUALIZATION SUGGESTIONS:


📈 TIME SERIES ANALYSIS OF SALES
Purpose: To visualize the trend of sales over time
Chart Type: Line chart
Variables: date (x-axis), sales (y-axis)
Expected Insights: Identify any seasonal patterns, overall trends, or unusual spikes/dips in sales

--------------------------------------------------

📈 CUSTOMER COUNT VS
Purpose: To examine the relationship between the number of customers and sales
Chart Type: Scatter plot
Variables: customers (x-axis), sales (y-axis)
Expected Insights: Determine if there's a correlation between customer count and sales volume

--------------------------------------------------

📈 SATISFACTION SCORE DISTRIBUTION
Purpose: To analyze the distribution of customer satisfaction scores
Chart Type: Histogram or Box plot
Variables: satisfaction_score
Expected Insights: Understand the overall customer satisfaction levels and identify any outliers

--------------------------------------------------

📈 SALES BY CATEGORY COMPARISON
Purpo

## Test copycat functionality

This is where we pass an image to Claude and ask it to recreate the image using plotly graph objects if possible, saved as an HTML file.

In [13]:
bedrock = boto3.client(
    service_name='bedrock-runtime',
    region_name='us-east-1'
)

base64_data = chartist.encode_image(image_path='/Users/jose.orjales/c-af-chartist/confusion_matrix.png')

analysis_prompt = """Analyse this image and provide a detailed analysis using the following structure:

# Description
[Provide a detailed description of what the image shows]

# Chart Analysis
## Type
[Specify the type of visualization (e.g., bar chart, line plot, scatter plot)]

## Axes
[List all axes and what they represent]

## Insights
[List key patterns, trends, or insights visible in the chart]

# Plotly Recreation
## Code
```python
[Provide a complete Plotly code snippet that could recreate this visualization]
```

## Data Structure
[Describe the data structure needed for the Plotly code]"""

body = {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 2000,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "type": "base64",
                                    "media_type": "image/png",
                                    "data": base64_data,
                                },
                            },
                            {"type": "text", "text": analysis_prompt},
                        ],
                    }
                ],
            }

body_bytes = json.dumps(body).encode('utf-8')

Encoding image to base64


In [14]:
response = bedrock.invoke_model(
    modelId='anthropic.claude-3-sonnet-20240229-v1:0', body=body_bytes
)

# Parse and return the response
response_body = json.loads(response['body'].read())
print(response_body['content'][0]['text'])

# Description
The image shows a correlation matrix, which displays the correlation coefficients between three variables: satisfaction_score, customers, and sales. The correlation coefficients are represented by colored cells, ranging from dark purple (strong negative correlation) to bright yellow (strong positive correlation).

# Chart Analysis
## Type
The visualization is a heatmap or correlation matrix, which is a graphical representation of a correlation table.

## Axes
The x-axis and y-axis represent the variables being analyzed: satisfaction_score, customers, and sales.

## Insights
- There is a strong positive correlation between satisfaction_score and customers, as indicated by the bright yellow cell.
- There is a moderate positive correlation between satisfaction_score and sales, as indicated by the light green cell.
- There is a weak negative correlation between customers and sales, as indicated by the light purple cell.

# Plotly Recreation
## Code
```python
import plotly.exp

In [11]:
json_images = chartist.handle_request(image_path='/Users/jose.orjales/c-af-chartist/confusion_matrix.png')

Detected image location input. Preparing to review...


Encoding image to base64


Claude markdown response: # Description
This image shows a correlation matrix heatmap. The matrix visualizes the relationships between three variables: satisfaction_score, customers, and sales. The correlation coefficients are represented by different colors, with yellow indicating strong positive correlation, dark purple indicating weak or no correlation, and lighter purple suggesting moderate correlation.

# Chart Analysis
## Type
The visualization is a heatmap, specifically a correlation matrix heatmap.

## Axes
- X-axis: Variables (sales, customers, satisfaction_score)
- Y-axis: Variables (satisfaction_score, customers, sales)

## Insights
1. There appears to be a strong positive correlation between sales and customers, indicated by the bright yellow color.
2. The satisfaction_score seems to have a weak or no correlation with both sales and customers, shown by the dark purple color.
3. The diagonal of the matrix is yellow, which is expected as it represents the correlation of each 

Sending prompt and data to agent executor...


{'input': '\n                Use the image_analysis_formatter tool to standardize the output and the save_plotly_visualization tool to create an interactive visualization.\n                \n                1. First, format the analysis using image_analysis_formatter with these parameters:\n                - image_information: # Description\nThis image shows a correlation matrix heatmap. The matrix visualizes the relationships between three variables: satisfaction_score, customers, and sales. The correlation coefficients are represented by different colors, with yellow indicating strong positive correlation, dark purple indicating weak or no correlation, and lighter purple suggesting moderate correlation.\n\n# Chart Analysis\n## Type\nThe visualization is a heatmap, specifically a correlation matrix heatmap.\n\n## Axes\n- X-axis: Variables (sales, customers, satisfaction_score)\n- Y-axis: Variables (satisfaction_score, customers, sales)\n\n## Insights\n1. There appears to be a strong p

In [13]:
print(json.dumps(json_images, indent=4))

{
    "input": "\n                Use the image_analysis_formatter tool to standardize the output and the save_plotly_visualization tool to create an interactive visualization.\n                \n                1. First, format the analysis using image_analysis_formatter with these parameters:\n                - image_information: # Description\nThis image shows a correlation matrix heatmap. The matrix visualizes the relationships between three variables: satisfaction_score, customers, and sales. The correlation coefficients are represented by different colors, with yellow indicating strong positive correlation, dark purple indicating weak or no correlation, and lighter purple suggesting moderate correlation.\n\n# Chart Analysis\n## Type\nThe visualization is a heatmap, specifically a correlation matrix heatmap.\n\n## Axes\n- X-axis: Variables (sales, customers, satisfaction_score)\n- Y-axis: Variables (satisfaction_score, customers, sales)\n\n## Insights\n1. There appears to be a str