In [3]:
import json
from dotenv import load_dotenv
import os
from pydantic import BaseModel
from openai import OpenAI
from datetime import datetime
from typing import List
from fastapi.encoders import jsonable_encoder
import pandas as pd


EXECUTION_DATETIME = datetime.now().strftime('%Y%m%d_%H%M%S') # será compartido en toda la ejecución para referenciar el mismo archivo en todas las capas

load_dotenv(dotenv_path='./.env')

client = OpenAI()
client.api_key = os.getenv('OPENAI_API_KEY')

class ReceiptDetail(BaseModel):
    quantity: float
    description: str
    category: str
    unit_price: float
    subtotal: float

class PaymentMethod(BaseModel):
    payment_method_type: str
    payment_method_name: str
    paid_amount: float

class Transaction(BaseModel):
    transaction_date: str
    commerce: str
    receipt_detail: List[ReceiptDetail]
    receipt_payment_method: List[PaymentMethod]
    receipt_total: float
    total_payment: float

Transaction.model_rebuild() # This is required to enable recursive types

class Response(BaseModel):
    transaction: Transaction

user_input_lang = 'spanish'

# aqui tiene que ir el correspondiente archivo de la carpeta landing
user_input_text = """
acabamos de comprar en costco un par de jeans por 500 unos cuernitos por 99 dos pastelitos por 150 cada uno 25 litros de gasolina por 23.01 cada uno 1.30 kilos de tomate en 29.90 cada uno pagado con tarjeta de crédito costco citibanamex un total de 1500.12 pesos y con tarjeta NU un total de 2000.01 pesos
"""
today_date = datetime.today().strftime('%Y-%m-%d')

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": f"""
You are an expert at structured data extraction. You will be given unstructured text with detailed receipts from purchases in commerces and detailed payment methods of that specific receipt and should convert it into the given structure. Consider todays date as {today_date} and consider you will receive the data in {user_input_lang} but the sctructure is in english.
        """ },
        {"role": "user", "content": user_input_text}
    ],
    response_format=Response,
)

response = completion.choices[0].message.parsed ### landing

# Transform response in py dict
response_json = jsonable_encoder(response)['transaction']

# Save into to_text layer
today_date_folder = datetime.today().strftime('%Y%m%d')
os.makedirs(f'./spend/text_to_json/{today_date_folder}',exist_ok=True)

transaction=Transaction(transaction_date='2024-11-13', commerce='Costco', receipt_detail=[ReceiptDetail(quantity=1.0, description='par de jeans', category='Clothing', unit_price=500.0, subtotal=500.0), ReceiptDetail(quantity=1.0, description='cuernitos', category='Bakery', unit_price=99.0, subtotal=99.0), ReceiptDetail(quantity=2.0, description='pastelitos', category='Bakery', unit_price=150.0, subtotal=300.0), ReceiptDetail(quantity=25.0, description='litros de gasolina', category='Fuel', unit_price=23.01, subtotal=575.25), ReceiptDetail(quantity=1.3, description='kilos de tomate', category='Produce', unit_price=29.9, subtotal=38.87)], receipt_payment_method=[PaymentMethod(payment_method_type='Credit Card', payment_method_name='Costco Citibanamex', paid_amount=1500.12), PaymentMethod(payment_method_type='Credit Card', payment_method_name='NU', paid_amount=2000.01)], receipt_total=1513.12, total_payment=3500.13)


In [2]:
filename = f'receipt_payments_{EXECUTION_DATETIME}.json'

filepath_json = f'./spend/text_to_json/{today_date_folder}/{filename}'

with open(filepath_json, 'w') as f:
    json.dump(response_json, f)

NameError: name 'EXECUTION_DATETIME' is not defined

In [None]:
%store filename
%store filepath_json

UsageError: Unknown variable 'filename'
