## OCR and Prompting Example

ตัวอย่างการเขียน OCR ด้วย Google Vision API ร่วมกับ Prompt

In [None]:
from google.oauth2 import service_account
from google.cloud import aiplatform
from vertexai.generative_models import GenerativeModel, Part, Image

project_name = "cpf-generative-ai" # ใส่ชื่อ project ที่นี้
credentials = service_account.Credentials.from_service_account_file("") # ใส่ path ไปยัง JSON file ที่นี่
aiplatform.init(project=project_name, credentials=credentials)

def get_response_with_image(prompt, image_path, model_name="gemini-pro-vision"):
    """
    ฟังก์ชันสำหรับรับคำตอบจากโมเดล AI โดยใช้รูปภาพประกอบ
    
    Args:
    prompt (str): คำถามหรือข้อความที่ต้องการให้ AI ตอบ
    image_path (str): พาธของไฟล์รูปภาพ
    model_name (str): ชื่อของโมเดลที่ต้องการใช้ (ค่าเริ่มต้นคือ "gemini-pro-vision")
    
    Returns:
    str: ข้อความตอบกลับจาก AI
    """

    model = GenerativeModel(model_name)
    image = Image.load_from_file(image_path)

    # สร้างคำตอบจากโมเดล
    response = model.generate_content([image, prompt])
    
    return response.text

ocr_prompt = """
Please analyze the given receipt image and extract the following information:

Customer name (if available)
Product details including:

Product name/code
Amount (quantity)
Unit (if applicable)
Total amount

For each product, please provide:

The product name or code as written
The quantity or amount
The unit (if given next to the quantity)

Important: If you see a "-" or a blank entry in any field, infer or use the value from the row immediately above it.
After extraction, format the output as a JSON object with the following structure:
{
"customer_name": "",
  "products": [
{
  "name": "",
  "amount": "",
  "unit (amount)": ""
}
],
"total_amount": ""
}
If any information is not available or unclear, leave the field empty or mark it as "N/A".
"""

In [None]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../<provide_api_here>.json"

def detect_text(path):
    """Detects text in the file."""
    from google.cloud import vision

    client = vision.ImageAnnotatorClient()

    with open(path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations

    for text in texts:
        vertices = [
            f"({vertex.x},{vertex.y})" for vertex in text.bounding_poly.vertices
        ]

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )
    return texts

def get_response(prompt, model_name="gemini-1.5-flash"):
    """
    ฟังก์ชันสำหรับรับคำตอบจากโมเดล AI
    
    Args:
    prompt (str): คำถามหรือข้อความที่ต้องการให้ AI ตอบ
    model_name (str): ชื่อของโมเดลที่ต้องการใช้ (ค่าเริ่มต้นคือ "gemini-1.5-flash")
    
    Returns:
    str: ข้อความตอบกลับจาก AI
    """
    model = GenerativeModel(model_name)
    response = model.generate_content(prompt)
    return response.text

In [None]:
ocr_text = detect_text(path)
ocr_text = '\n'.join([text.description for text in ocr_text])

prompt = """
Given the OCR text extracted from a receipt image, please analyze and extract the following information:

Customer name (if available)
Product details including:

Product name/code
Amount (quantity)
Unit (if applicable)


Total amount

For each product, provide:

The product name or code as written
The quantity or amount
The unit (if given next to the quantity)

Important instructions:

If you see a "-" or a blank entry in any field, infer or use the value from the row immediately above it.
Be aware that OCR might introduce errors or inconsistencies in formatting.
Look for patterns in the text that indicate product listings, such as consistent formatting or numbering.
The customer name may be near the top of the receipt, often preceded by a label like "ชื่อ" or "Name".
The total amount is typically found near the bottom of the receipt, often labeled as "รวม", "Total", or similar.

After extraction, format the output as a JSON object with the following structure:
{
"customer_name": "",
"products": [
{
"name": "",
"amount": "",
"unit (amount)": ""
}
],
"total_amount": ""
}
If any information is not available or unclear, leave the field empty or mark it as "N/A".
Please process the following OCR text and provide the structured JSON output:

""" + ocr_text

output = get_response(prompt)

## Prompting from Image Directly

เราสามารถ Prompt Gemini จากภาพได้โดยตรงดังนี้


In [None]:
from google.oauth2 import service_account
from google.cloud import aiplatform
from vertexai.generative_models import GenerativeModel, Part, Image

project_name = "cpf-generative-ai" # ใส่ชื่อ project ที่นี้
credentials = service_account.Credentials.from_service_account_file("") # ใส่ path ไปยัง JSON file ที่นี่
aiplatform.init(project=project_name, credentials=credentials)

def get_response_with_image(prompt, image_path, model_name="gemini-pro-vision"):
    """
    ฟังก์ชันสำหรับรับคำตอบจากโมเดล AI โดยใช้รูปภาพประกอบ
    
    Args:
    prompt (str): คำถามหรือข้อความที่ต้องการให้ AI ตอบ
    image_path (str): พาธของไฟล์รูปภาพ
    model_name (str): ชื่อของโมเดลที่ต้องการใช้ (ค่าเริ่มต้นคือ "gemini-pro-vision")
    
    Returns:
    str: ข้อความตอบกลับจาก AI
    """

    model = GenerativeModel(model_name)
    image = Image.load_from_file(image_path)

    # สร้างคำตอบจากโมเดล
    response = model.generate_content([image, prompt])
    
    return response.text

ocr_prompt = """
Please analyze the given receipt image and extract the following information:

Customer name (if available)
Product details including:

Product name/code
Amount (quantity)
Unit (if applicable)
Total amount

For each product, please provide:

The product name or code as written
The quantity or amount
The unit (if given next to the quantity)

Important: If you see a "-" or a blank entry in any field, infer or use the value from the row immediately above it.
After extraction, format the output as a JSON object with the following structure:
{
"customer_name": "",
  "products": [
{
  "name": "",
  "amount": "",
  "unit (amount)": ""
}
],
"total_amount": ""
}
If any information is not available or unclear, leave the field empty or mark it as "N/A".
"""

In [None]:
output = get_response_with_image(ocr_prompt, path)