In [0]:
db_host = "www.bigdataybi.com"
db_port = 3306 
db_name = "farmafake"
db_user = "curso"
db_password = dbutils.secrets.get(scope="mysql-secrets", key="db_password")
tables = [
    "customers",
    "products",
    "stores",
    "invoice_header",
    "invoice_details"
]
for table_name in tables:
    df = spark.read.format("jdbc").options(
        url=f"jdbc:mysql://{db_host}:{db_port}/{db_name}",
        dbtable=f"{db_name}.{table_name}",
        user=db_user,
        password=db_password
    ).load()
    df.createOrReplaceTempView(table_name)

In [0]:
%sql
CREATE OR REPLACE TABLE main.default.sales
SELECT 
ih.doc_id,
ih.doc_code,
ih.doc_type,
ih.store_id,
ih.customer_id,
ih.doc_date,
di.detail_id,
di.product_id,
di.quantity,
di.unit_price,
di.discount_percent,
(di.quantity * di.unit_price) - ((di.quantity * di.unit_price)*di.discount_percent/100) as total_mount,
c.customer_name,
c.date_birthday,
c.email,
c.genero,
c.telephone,
p.product_name,
p.category,
year(ih.doc_date) as anio,
month(ih.doc_date) as mes,
day(ih.doc_date) as dia
FROM invoice_header ih 
INNER JOIN invoice_details di ON ih.doc_id = di.doc_id
INNER JOIN customers c ON ih.customer_id = c.customer_id
INNER JOIN products p ON di.product_id = p.product_id

In [0]:
# Nombre completo de la tabla
table_name = "main.default.sales"

# Diccionario de columnas y sus descripciones
column_descriptions = {
    "doc_id": "Identificador único de la factura (UUID).",
    "doc_code": "Código interno del documento con formato tipo 001-010-XXXXXXXXX.",
    "doc_type": "Tipo de documento: FA (Factura) o NC (Nota de Crédito).",
    "store_id": "Identificador de la tienda donde se realizó la venta.",
    "customer_id": "Identificador del cliente asociado a la factura.",
    "doc_date": "Fecha y hora de emisión de la factura.",
    "detail_id": "Identificador único del detalle del producto dentro de la factura.",
    "product_id": "Identificador del producto facturado.",
    "quantity": "Cantidad del producto vendida en el detalle.",
    "unit_price": "Precio unitario del producto en el momento de la venta.",
    "discount_percent": "Porcentaje de descuento aplicado al producto.",
    "customer_name": "Nombre completo del cliente.",
    "date_birthday": "Fecha de nacimiento del cliente.",
    "email": "Correo electrónico del cliente.",
    "genero": "Género del cliente (masculino/femenino).",
    "telephone": "Número de teléfono del cliente.",
    "product_name": "Nombre comercial del producto.",
    "category": "Categoría o tipo del producto (por ejemplo, Antibióticos, Vitaminas, etc.).",
    "total_mount":"Valor total de la venta por producto",
    "anio":"Año de la venta",
    "mes":"Mes de la venta",
    "dia": "Dia de la venta"
}

# Ejecutar los ALTER TABLE para cada columna
for col, comment in column_descriptions.items():
    query = f"COMMENT ON COLUMN {table_name}.{col} IS '{comment}'"
    spark.sql(query)
    print(f"[INFO] Comentando la columna {col}")
print("✅ Todas las descripciones de columnas se han agregado correctamente.")

# Pregunta a la IA

¿Cuánto se ha vendido por Año?

In [0]:
%sql
SELECT 
anio,
SUM(total_mount) as total_mount
FROM main.default.sales
GROUP BY anio
ORDER BY anio

# Pregunta a la IA por el API

Para esto necesitarás:
- space_id: Es el codigo que se genera al abrir una conversación con Genie. Genie > New > Add your table > Settings > Space Id (Copy)
- access_token: Generalo desde tu espacio personal. User > Settings > Developer > 

In [0]:
dbutils.widgets.text("space_id","")
dbutils.widgets.text("access_token","")

In [0]:
import requests
# 1. getspace: Retrieve available spaces
workspace_url = "https://" + dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName().get()
space_id=dbutils.widgets.get("space_id")
access_token=dbutils.widgets.get("access_token")
get_spaces_url = f"{workspace_url}/api/2.0/genie/spaces/{space_id}"
response1 = requests.get(get_spaces_url, headers={"Authorization": f"Bearer {access_token}"})
print(response1.text)

In [0]:
# 2. startconversation: Start a new conversation in a specific space
start_conv_url = f"{workspace_url}/api/2.0/genie/spaces/{space_id}/start-conversation"
payload_start = {"content": "Analisis de Ventas"} # Nombre del hilo de conversación
response2 = requests.post(start_conv_url, headers={"Authorization": f"Bearer {access_token}"}, json=payload_start)
print(response2.text)

In [0]:
# 3. createmessage: Create a new message in the conversation
conversation_id = response2.json()['conversation_id']
create_msg_url = f"{workspace_url}/api/2.0/genie/spaces/{space_id}/conversations/{conversation_id}/messages"
payload_message = {"content": "Dame el valor total de venta por año"}
response3 = requests.post(create_msg_url, headers={"Authorization": f"Bearer {access_token}"}, json=payload_message)
print(response3.text)

In [0]:
# 4. getmessage: Retrieve the created message from the conversation
message_id = response3.json()['message_id']
get_msg_url = f"{workspace_url}/api/2.0/genie/spaces/{space_id}/conversations/{conversation_id}/messages/{message_id}"
response4 = requests.get(get_msg_url, headers={"Authorization": f"Bearer {access_token}"})
print(response4.text)

In [0]:
# 5. getmessageattachmentqueryresult: Get the SQL query attachment result (if any) for the message
attachment_id = response4.json()['attachments'][0]['attachment_id']
get_attachment_result_url = f"{workspace_url}/api/2.0/genie/spaces/{space_id}/conversations/{conversation_id}/messages/{message_id}/attachments/{attachment_id}/query-result"
response5 = requests.get(get_attachment_result_url, headers={"Authorization": f"Bearer {access_token}"})
print(response5.text)

In [0]:
# 6. we can also use SQL endpoint statement info to get that result
statement_id = response4.json()['query_result']['statement_id']
get_attachment_result_url = f"{workspace_url}/api/2.0/sql/statements/{statement_id}"
response6 = requests.get(get_attachment_result_url, headers={"Authorization": f"Bearer {access_token}"})
print(response6.text)