In [4]:
import json
from dotenv import load_dotenv
import os
from pydantic import BaseModel
from openai import OpenAI
from datetime import datetime
from typing import List
from fastapi.encoders import jsonable_encoder
import pandas as pd
import base64



# Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?
# 11 25

# client = OpenAI()
# client.api_key = os.getenv('OPENAI_API_KEY')

class ReceiptDetail(BaseModel):
    quantity: float
    description: str
    category: str
    unit_price: float
    subtotal: float

class PaymentMethod(BaseModel):
    payment_method_type: str
    payment_method_name: str
    paid_amount: float

class Transaction(BaseModel):
    transaction_date: str
    commerce: str
    receipt_detail: List[ReceiptDetail]
    receipt_payment_method: List[PaymentMethod]
    receipt_total: float
    total_payment: float

Transaction.model_rebuild() # This is required to enable recursive types

class Response(BaseModel):
    transaction: Transaction

user_input_lang = 'spanish'

# aqui tiene que la imagen del ticket a procesar
image_path = "C:\\Users\\David\\Downloads\\Phone Link\\IMG-20220916-WA0000.jpg"

with open(image_path, "rb") as f:
    image_data = f.read()

# Use the standard library base64.b64encode function to encode the image into base64 format
image_url = f"data:image/{os.path.splitext(image_path)[1]};base64,{base64.b64encode(image_data).decode('utf-8')}"


EXECUTION_DATETIME = datetime.now().strftime('%Y%m%d_%H%M%S') # será compartido en toda la ejecución para referenciar el mismo archivo en todas las capas

load_dotenv(dotenv_path='./.env')

client = OpenAI(
    base_url="https://api.moonshot.ai/v1",
    api_key=os.getenv('MOONSHOT_API_KEY'),
)

today_date = datetime.today().strftime('%Y-%m-%d')

In [None]:
chat_completion = client.chat.completions.create(
        model="kimi-k2.5",
        # messages=[{"role": "user", "content": "Hello"}],
        messages=[
        {"role": "system", "content": f"""
You are an assistant expecting an image of a receipt. You will use your OCR capabilities to fit the attributes and values according to the json structure I have provided. Consider todays date as {today_date}. Almost all receipts will be in {user_input_lang}, although the json structure is in english. However, if you identify either from user prompt or by yourself that the receipt is in another language, translate it to {user_input_lang} and then fit it into the structure. Return your analysis in valid JSON matching this schema: {Response.model_json_schema()}. Any other attributes that don't fit in the json schema such as address of the commerce, tax rates, etc, and their values put them under additional key "__ignored_attributes".
        """ },
        {
            "role": "user",
            # Note: content is changed from str type to a list containing multiple content parts.
            # Image (image_url) is one part, and text is another part.
            "content": [
                {
                    "type": "image_url",  # <-- Use image_url type to upload images, with content as base64-encoded image data
                    "image_url": {
                        "url": image_url,
                    },
                },
                {
                    "type": "text",
                    "text": "Process this receipt.",  # <-- Use text type to provide text instructions
                },
            ],
        },
    ],
        response_format={"type": "json_object"}
)

# print(chat_completion.choices[0].message.content)
# print(chat_completion.usage.prompt_tokens, chat_completion.usage.completion_tokens)



response = chat_completion.choices[0].message.content ### landing

print(response)
# Transform response in py dict
# response_json = jsonable_encoder(response)['transaction']
# print(response_json)

{
  "transaction": {
    "transaction_date": "2022-09-16",
    "commerce": "FERRETERIA COYOACAN",
    "receipt_detail": [
      {
        "quantity": 1.0,
        "description": "HERRAJE ROSMASTER 2107 FAMA",
        "category": "Herrajes",
        "unit_price": 229.00,
        "subtotal": 229.00
      },
      {
        "quantity": 1.0,
        "description": "TEFLON 1/2 COFLEX",
        "category": "Fontanería",
        "unit_price": 9.00,
        "subtotal": 9.00
      }
    ],
    "receipt_payment_method": [
      {
        "payment_method_type": "Efectivo",
        "payment_method_name": "Efectivo",
        "paid_amount": 300.00
      }
    ],
    "receipt_total": 238.00,
    "total_payment": 300.00,
    "_ignored_attributes": {
      "propietario": "BERNARDO DIAZ DE LEON GARZA",
      "direccion": "Av. Ruiz Cortines No. 2929 Ote., Fracc. Coyoacan C.P. 64510, Monterrey, Nuevo Leon, Mexico",
      "telefono": "(81) 8355 2250",
      "rfc": "DIQB-640617-BWA",
      "nota_numero": "5

In [6]:
chat_completion

ChatCompletion(id='chatcmpl-69843cef6c98d241600033d3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "transaction": {\n    "transaction_date": "2022-09-16",\n    "commerce": "FERRETERIA COYOACAN",\n    "receipt_detail": [\n      {\n        "quantity": 1.0,\n        "description": "HERRAJE ROSMASTER 2107 FAMA",\n        "category": "Herrajes",\n        "unit_price": 229.00,\n        "subtotal": 229.00\n      },\n      {\n        "quantity": 1.0,\n        "description": "TEFLON 1/2 COFLEX",\n        "category": "Fontanería",\n        "unit_price": 9.00,\n        "subtotal": 9.00\n      }\n    ],\n    "receipt_payment_method": [\n      {\n        "payment_method_type": "Efectivo",\n        "payment_method_name": "Efectivo",\n        "paid_amount": 300.00\n      }\n    ],\n    "receipt_total": 238.00,\n    "total_payment": 300.00,\n    "_ignored_attributes": {\n      "propietario": "BERNARDO DIAZ DE LEON GARZA",\n      "direccion"

In [9]:
raw_content = chat_completion.choices[0].message.content
# response_json = jsonable_encoder(raw_content)['transaction']
print(raw_content)

{
  "transaction": {
    "transaction_date": "2022-09-16",
    "commerce": "FERRETERIA COYOACAN",
    "receipt_detail": [
      {
        "quantity": 1.0,
        "description": "HERRAJE ROSMASTER 2107 FAMA",
        "category": "Herrajes",
        "unit_price": 229.00,
        "subtotal": 229.00
      },
      {
        "quantity": 1.0,
        "description": "TEFLON 1/2 COFLEX",
        "category": "Fontanería",
        "unit_price": 9.00,
        "subtotal": 9.00
      }
    ],
    "receipt_payment_method": [
      {
        "payment_method_type": "Efectivo",
        "payment_method_name": "Efectivo",
        "paid_amount": 300.00
      }
    ],
    "receipt_total": 238.00,
    "total_payment": 300.00,
    "_ignored_attributes": {
      "propietario": "BERNARDO DIAZ DE LEON GARZA",
      "direccion": "Av. Ruiz Cortines No. 2929 Ote., Fracc. Coyoacan C.P. 64510, Monterrey, Nuevo Leon, Mexico",
      "telefono": "(81) 8355 2250",
      "rfc": "DIQB-640617-BWA",
      "nota_numero": "5

In [None]:
# Save into to_text layer
today_date_folder = datetime.today().strftime('%Y%m%d')
os.makedirs(f'../../../datastore/spend/text_to_json/{today_date_folder}',exist_ok=True)

NameError: name 'datetime' is not defined

In [None]:
filename = f'receipt_payments_{EXECUTION_DATETIME}.json'
filepath_json = f'../../../datastore/spend/text_to_json/{today_date_folder}/{filename}'

with open(filepath_json, 'w') as f:
    json.dump(json.loads(raw_content), f)