In [None]:
import json
from dotenv import load_dotenv
import os
from pydantic import BaseModel, ValidationError
from openai import OpenAI
from datetime import datetime
from typing import List
from fastapi.encoders import jsonable_encoder
import pandas as pd



# Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?
# 11 25

# client = OpenAI()
# client.api_key = os.getenv('OPENAI_API_KEY')

class ReceiptDetail(BaseModel):
    quantity: float
    description: str
    category: str
    unit_price: float
    subtotal: float

class PaymentMethod(BaseModel):
    payment_method_type: str
    payment_method_name: str
    paid_amount: float

class Transaction(BaseModel):
    transaction_date: str
    commerce: str
    receipt_detail: List[ReceiptDetail]
    receipt_payment_method: List[PaymentMethod]
    receipt_total: float
    total_payment: float

Transaction.model_rebuild() # This is required to enable recursive types

class Response(BaseModel):
    transaction: Transaction

user_input_lang = 'spanish'

# aqui tiene que ir el correspondiente archivo de la carpeta landing
user_input_text = """
acabamos de comprar en costco un par de jeans por 500 unos cuernitos por 99 dos pastelitos por 150 cada uno 25 litros de gasolina por 23.01 cada uno 1.30 kilos de tomate en 29.90 cada uno pagado con tarjeta de crédito costco citibanamex un total de 1500.12 pesos y con tarjeta NU un total de 2000.01 pesos
"""

EXECUTION_DATETIME = datetime.now().strftime('%Y%m%d_%H%M%S') # será compartido en toda la ejecución para referenciar el mismo archivo en todas las capas

load_dotenv(dotenv_path='./.env')

client = OpenAI(
    base_url="https://api.moonshot.ai/v1",
    api_key=os.getenv('MOONSHOT_API_KEY'),
)

today_date = datetime.today().strftime('%Y-%m-%d')

chat_completion = client.chat.completions.create(
        model="kimi-k2.5",
        # messages=[{"role": "user", "content": "Hello"}],
        messages=[
        {"role": "system", "content": f"""
You are an expert at structured data extraction. You will be given unstructured text with detailed receipts from purchases in commerces and detailed payment methods of that specific receipt and should convert it into the given structure. Consider todays date as {today_date} and consider you will receive the data in {user_input_lang} but the structure is in english. Return your analysis in valid JSON matching this schema: {Response.model_json_schema()}
        """ },
        {"role": "user", "content": user_input_text}
    ],
        response_format={"type": "json_object"}
)


# Transform response in py dict
# response_json = jsonable_encoder(response)['transaction']
# print(response_json)

In [17]:
print(chat_completion)

ChatCompletion(id='chatcmpl-69843a74576c2e25a8cf8d1f', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "transaction": {\n    "transaction_date": "2026-02-05",\n    "commerce": "Costco",\n    "receipt_detail": [\n      {\n        "quantity": 1,\n        "description": "Jeans",\n        "category": "Clothing",\n        "unit_price": 500,\n        "subtotal": 500\n      },\n      {\n        "quantity": 1,\n        "description": "Cuernitos",\n        "category": "Bakery",\n        "unit_price": 99,\n        "subtotal": 99\n      },\n      {\n        "quantity": 2,\n        "description": "Pastelitos",\n        "category": "Bakery",\n        "unit_price": 150,\n        "subtotal": 300\n      },\n      {\n        "quantity": 25,\n        "description": "Gasolina",\n        "category": "Fuel",\n        "unit_price": 23.01,\n        "subtotal": 575.25\n      },\n      {\n        "quantity": 1.30,\n        "description": "Tomate",\n    

In [None]:
# Manually parse the output into Pydantic
try:
    raw_content = chat_completion.choices[0].message.content
    transaction = Response.model_validate_json(raw_content)
except ValidationError as e:
    print(f"LLM output failed validation: {e}")

In [25]:
raw_content = chat_completion.choices[0].message.content
# response_json = jsonable_encoder(raw_content)['transaction']
print(raw_content)

{
  "transaction": {
    "transaction_date": "2026-02-05",
    "commerce": "Costco",
    "receipt_detail": [
      {
        "quantity": 1,
        "description": "Jeans",
        "category": "Clothing",
        "unit_price": 500,
        "subtotal": 500
      },
      {
        "quantity": 1,
        "description": "Cuernitos",
        "category": "Bakery",
        "unit_price": 99,
        "subtotal": 99
      },
      {
        "quantity": 2,
        "description": "Pastelitos",
        "category": "Bakery",
        "unit_price": 150,
        "subtotal": 300
      },
      {
        "quantity": 25,
        "description": "Gasolina",
        "category": "Fuel",
        "unit_price": 23.01,
        "subtotal": 575.25
      },
      {
        "quantity": 1.30,
        "description": "Tomate",
        "category": "Groceries",
        "unit_price": 29.90,
        "subtotal": 38.87
      }
    ],
    "receipt_payment_method": [
      {
        "payment_method_type": "Credit Card",
       

In [27]:
# Save into to_text layer
today_date_folder = datetime.today().strftime('%Y%m%d')
os.makedirs(f'./spend/text_to_json/{today_date_folder}',exist_ok=True)

In [28]:
filename = f'receipt_payments_{EXECUTION_DATETIME}.json'
filepath_json = f'./spend/text_to_json/{today_date_folder}/{filename}'

with open(filepath_json, 'w') as f:
    json.dump(raw_content, f)

In [None]:
%store filename
%store filepath_json
%store today_date_folder
%store EXECUTION_DATETIME

UsageError: Unknown variable 'filename'
