In [10]:
import json
from dotenv import load_dotenv
import os
from pydantic import ValidationError
from openai import OpenAI
from datetime import datetime
from fastapi.encoders import jsonable_encoder
from TransactionResponse import Response

load_dotenv(dotenv_path='./.env')

api_key=os.getenv('MOONSHOT_API_KEY')
base_url = "https://api.moonshot.ai/v1"
model="kimi-k2.5"

EXECUTION_DATETIME = datetime.now().strftime('%Y%m%d_%H%M%S') # será compartido en toda la ejecución para referenciar el mismo archivo en todas las capas

today_date = datetime.today().strftime('%Y-%m-%d')

user_input_lang = 'spanish'

system_content = f"""
You are an expert at structured data extraction. You will be given unstructured text with detailed receipts from purchases in commerces and detailed payment methods of that specific receipt and should convert it into the given structure. Consider todays date as {today_date} and consider you will receive the data in {user_input_lang} but the structure is in english. Return your analysis in valid JSON matching this schema: {Response.model_json_schema()}
        """ 

# aqui tiene que ir el correspondiente archivo de la carpeta landing
user_input_text = """
acabamos de comprar en costco un par de jeans por 500 unos cuernitos por 99 dos pastelitos por 150 cada uno 25 litros de gasolina por 23.01 cada uno 1.30 kilos de tomate en 29.90 cada uno pagado con tarjeta de crédito costco citibanamex un total de 1500.12 pesos y con tarjeta NU un total de 2000.01 pesos
"""

# messages=[{"role": "user", "content": "Hello"}]
messages=[
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_input_text}
    ]

In [11]:
client = OpenAI(
    base_url=base_url,
    api_key=api_key,
)

In [12]:
chat_completion = client.chat.completions.create(
        model=model,
        messages=messages,
        response_format={"type": "json_object"}
)

In [13]:
print(chat_completion)

ChatCompletion(id='chatcmpl-698967345d4d52cb69210899', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "transaction": {\n    "transaction_date": "2026-02-08",\n    "commerce": "Costco",\n    "receipt_detail": [\n      {\n        "quantity": 1,\n        "description": "par de jeans",\n        "category": "Clothing",\n        "unit_price": 500,\n        "subtotal": 500\n      },\n      {\n        "quantity": 1,\n        "description": "cuernitos",\n        "category": "Bakery",\n        "unit_price": 99,\n        "subtotal": 99\n      },\n      {\n        "quantity": 2,\n        "description": "pastelitos",\n        "category": "Bakery",\n        "unit_price": 150,\n        "subtotal": 300\n      },\n      {\n        "quantity": 25,\n        "description": "gasolina",\n        "category": "Fuel",\n        "unit_price": 23.01,\n        "subtotal": 575.25\n      },\n      {\n        "quantity": 1.30,\n        "description": "tomate"

In [15]:
# Manually parse the output into Pydantic
try:
    raw_content = chat_completion.choices[0].message.content
    transaction = Response.model_validate_json(raw_content)
except ValidationError as e:
    print(f"LLM output failed validation: {e}")

In [16]:
raw_content = chat_completion.choices[0].message.content
# response_json = jsonable_encoder(raw_content)['transaction']
print(raw_content)

{
  "transaction": {
    "transaction_date": "2026-02-08",
    "commerce": "Costco",
    "receipt_detail": [
      {
        "quantity": 1,
        "description": "par de jeans",
        "category": "Clothing",
        "unit_price": 500,
        "subtotal": 500
      },
      {
        "quantity": 1,
        "description": "cuernitos",
        "category": "Bakery",
        "unit_price": 99,
        "subtotal": 99
      },
      {
        "quantity": 2,
        "description": "pastelitos",
        "category": "Bakery",
        "unit_price": 150,
        "subtotal": 300
      },
      {
        "quantity": 25,
        "description": "gasolina",
        "category": "Fuel",
        "unit_price": 23.01,
        "subtotal": 575.25
      },
      {
        "quantity": 1.30,
        "description": "tomate",
        "category": "Groceries",
        "unit_price": 29.90,
        "subtotal": 38.87
      }
    ],
    "receipt_payment_method": [
      {
        "payment_method_type": "Credit Card",


In [17]:
# Save into to_text layer
today_date_folder = datetime.today().strftime('%Y%m%d')
os.makedirs(f'../../../datastore/spend/text_to_json/{today_date_folder}',exist_ok=True)

In [26]:
filename = f'receipt_payments_{EXECUTION_DATETIME}.json'
filepath_json = f'../../../datastore/spend/text_to_json/{today_date_folder}/{filename}'

with open(filepath_json, 'w') as f:
    json.dump(json.loads(raw_content), f)

In [19]:
%store filename
%store filepath_json
%store today_date_folder
%store EXECUTION_DATETIME

Stored 'filename' (str)
Stored 'filepath_json' (str)
Stored 'today_date_folder' (str)
Stored 'EXECUTION_DATETIME' (str)
