## Lab: Function Calling, para lanzar consultas SQL a nuestra DB

In [None]:
import boto3
import awswrangler as wr
from pprint import pprint
from utils.bedrock_utils import build_llm_query, chat_with_claude_nl_to_sql, format_pretty_output

### Creamos rápidamente Tabla en Athena

> Dataset source: https://www.kaggle.com/datasets/ihelon/coffee-sales

In [None]:
# Configure names
table_name = 'coffee_shop_sales'
s3_bucket = 'genai-carlos-contreras-bucket-data-quarks-labs-oregon-01'
s3_key_path = f'datasets/{table_name}'
s3_coffee_sales_file = f'datasets/{table_name}/{table_name}.csv'

#### Creamos Base de Datos

In [None]:
db_name = "db_coffee_shop_sales"
if "db_coffee_shop_sales" not in wr.catalog.databases().values:
    wr.catalog.create_database(db_name)

#### Subimos datos a S3

In [None]:
# Subimos datos o ya lo hicimos?
upload_data = False

In [None]:
if upload_data:
    # SDK
    s3_client = boto3.client('s3')

    # Upload the file to S3
    response = s3_client.upload_file(f'data/sales_coffee/{table_name}.csv', 
                                    s3_bucket, 
                                    s3_coffee_sales_file)


#### Creamos tabla
> Source: [AWS SDK for Pandas](https://aws-sdk-pandas.readthedocs.io/en/stable/stubs/awswrangler.catalog.create_csv_table.html#awswrangler.catalog.create_csv_table)

In [None]:
# Creamos tabla o ya la creamos?
create_table = False

In [None]:
if create_table:
    wr.catalog.create_csv_table(
        database=db_name,
        table=table_name,
        path=f's3://{s3_bucket}/{s3_key_path}/',
        columns_types={'date': 'date', 'datetime': 'timestamp', 'cash_type':'string', 'card':'string', 'money':'double', 'coffee_name':'string'},
        description='Coffee Shop Sales table',
        skip_header_line_count=1,
    )

### Consultamos la tabla

In [None]:
%%time
# Enable to run query!
wr.athena.read_sql_query(f"SELECT * FROM {table_name} LIMIT 10", database=db_name, ctas_approach=False)

> Note de [la documentación](https://aws-sdk-pandas.readthedocs.io/en/stable/stubs/awswrangler.athena.read_sql_query.html):

Create the default Athena bucket if it doesn’t exist and s3_output is None.

(e.g. s3://aws-athena-query-results-ACCOUNT-REGION/)

In [None]:
%%time
# Enable to run query!
# wr.athena.read_sql_query(f"SELECT cash_type, round(avg(money),2) avg_spent,count() as num_events FROM {table_name} group by cash_type", database=db_name, ctas_approach=False)

## Function Calling

### Definimos el cómo llamar a la función

In [None]:
##############################################################################
# SQL Function Arguments for Function Calling feature
##############################################################################

# Schema definition for tool. No schema for now; i.e. call the Lambda with no args.
get_tool_spec_sql = {
        "name": "run_query",
        "description": "Run SQL to get details about our Coffee Shop sales.",
        "inputSchema": {
             "json": {
                "type": "object",
                "properties": {
                    "sql_query": {
                        "type": "string",
                        "description": f"Ansi SQL statement to query table {table_name}."
                    }
                },
                "required": ["sql_query"], 
            }
        }
    }


toolConfig = {
    'tools': [
        {
            'toolSpec': get_tool_spec_sql
        }
    ]
}

### Construimos pregunta con Tool

In [None]:
def ask_llm(question, toolConfig):
    
    # Build LLM query
    messages = build_llm_query(questions=question,tools_instructions=toolConfig)

    # Ask LLM to translate NL to SQL and to run it!
    response = chat_with_claude_nl_to_sql(messages=messages, toolConfig=toolConfig, model_id = "anthropic.claude-3-5-haiku-20241022-v1:0")
    
    return response

### Ask Claude

In [None]:
# Piensa en otras preguntas que puedas hacerle a LLM; i.e. Show sales by day, Show sales type of coffee, Show sales by cash or card...
# i.e. How much did we sell in March 2024, by type of coffee? Provide a short business narrative of results
response = ask_llm(question="How much did we sell in March 2024?", toolConfig=toolConfig)

In [None]:
print(format_pretty_output(text=response['response'], width=120))