<a href="https://colab.research.google.com/github/YunzhenYang-collection/Expansion-Exercises-Image-Recognition/blob/main/versions/im_re_v2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###本次實現:
1. 自動辨識category
2. ~~確認vital_signs 的資料結構~~
3. 影像辨識增加 error 的型別

In [None]:
!pip install -q -U google-generativeai google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client

In [None]:
!pip install python-dotenv

In [None]:
import google.generativeai as genai
from IPython.display import Markdown
import httpx
import base64
import imghdr
import re
import json
from dotenv import load_dotenv
import os
import cv2
from PIL import Image, ImageFilter

In [None]:
from google.colab import drive
import os
drive.mount('/content/drive')
from google.colab import files

In [None]:
from google.colab import userdata
# userdata.get('GOOGLE_API_KEY')
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY2')
# 確認密鑰是否正確檢索
if GOOGLE_API_KEY:
    print("API key retrieved successfully.")
    genai.configure(api_key=GOOGLE_API_KEY)
else:
    print("Error: API key not found.")

## 辨識函式 輸入類別

In [None]:
def encode_image(image_content):
    return base64.b64encode(image_content).decode("utf-8")

def is_valid_for_category(image_type, category):
    if category == "vital_signs":
        return image_type in ['jpeg', 'jpg', 'png']
    elif category == "todo":
        return image_type == 'png'
    elif category == "medication":
        return image_type == 'jpeg'
    return False

def recognize_image(image_path, category):
    try:
        # 讀取圖片
        with open(image_path, "rb") as image_file:
            image_content = image_file.read()

        image_type = imghdr.what(None, image_content)

        if image_type == 'jpeg':
            mime_type = 'image/jpeg'
        elif image_type == 'jpg':
            mime_type = 'image/jpg'
        elif image_type == 'png':
            mime_type = 'image/png'
        elif image_type == 'heic':
            mime_type = 'image/heic'
        else:
            mime_type = 'application/octet-stream'

        encoded_image = encode_image(image_content)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro")

        # 檢查圖片類型與所選的資料類型是否匹配
        if not is_valid_for_category(image_type, category):
            return {"error": "選擇的資料類型與圖片不符，請重新選擇正確的資料類型。"}

        result = {}

        if category == "vital_signs":
            prompt = """
            請辨識圖片中的血壓計讀數，並以繁體中文輸出以下資訊：
            高壓: [高壓值]
            低壓: [低壓值]
            心律: [心律值]// 請注意是否有❤的符號，若有就注意符號旁的數字並進行處理
            脈搏: [脈搏值]
            """
            response = model.generate_content(
                [
                    {"mime_type": mime_type, "data": encoded_image},
                    prompt,
                ]
            )

            if response.text:
                high_pressure = re.search(r"高壓:\s*(\d+)", response.text)
                low_pressure = re.search(r"低壓:\s*(\d+)", response.text)
                heart_rate = re.search(r"心律:\s*(\d+)", response.text)
                pulse = re.search(r"脈搏:\s*(\d+)", response.text)

                result["category"] = category
                result["data"] = {
                    "high_pressure": high_pressure.group(1) if high_pressure else "未找到",
                    "low_pressure": low_pressure.group(1) if low_pressure else "未找到",
                    "heart_rate": heart_rate.group(1) if heart_rate else "未找到",
                    "pulse": pulse.group(1) if pulse else "未找到",
                }
            else:
                result["error"] = "No response text."

        elif category == "todo":
            prompt = """
            請辨識圖片中的代辦事項資訊，並以繁體中文輸出以下資訊：
            標題: [代辦事項標題]
            日期: [日期 (YYYY-MM-DD)]
            時間: [時間]
            描述: [事項描述]
            """
            response = model.generate_content(
                [
                    {"mime_type": mime_type, "data": encoded_image},
                    prompt,
                ]
            )

            if response.text:
                title = re.search(r"標題:\s*(.+)", response.text)
                date = re.search(r"日期:\s*(.+)", response.text)
                time = re.search(r"時間:\s*(.+)", response.text)
                description = re.search(r"描述:\s*(.+)", response.text)

                result["category"] = category
                result["data"] = {
                    "title": title.group(1) if title else "未找到",
                    "date": date.group(1) if date else "未找到",
                    "time": time.group(1) if time else "未找到",
                    "description": description.group(1) if description else "未找到",
                }
            else:
                result["error"] = "No response text."

        elif category == "medication":
            prompt = """
            請辨識圖片中的所有藥物資訊，並以繁體中文輸出以下資訊：
            藥物名稱: [藥物名稱]
            用藥時間: [用藥時間 (HH:MM)]
            適應症: [適應症]
            副作用: [辨識或是列出可能的副作用副作用]
            藥物顏色: [藥物顏色]
            藥物外觀: [藥物外觀]
            服藥警語: [服藥警語]
            若有多個藥物，請分別輸出每個藥物的資訊。
            """
            response = model.generate_content(
                [
                    {"mime_type": mime_type, "data": encoded_image},
                    prompt,
                ]
            )

            if response.text:
                medications = []
                medication_data = response.text.split("\n\n")  # 假設每個藥品之間以空行分隔
                for data in medication_data:
                    name = re.search(r"藥物名稱:\s*(.+)", data)
                    time = re.search(r"用藥時間:\s*(.+)", data)
                    indication = re.search(r"適應症:\s*(.+)", data)
                    side_effects = re.search(r"副作用:\s*(.+)", data)
                    color = re.search(r"藥物顏色:\s*(.+)", data)
                    appearance = re.search(r"藥物外觀:\s*(.+)", data)
                    warning = re.search(r"服藥警語:\s*(.+)", data)

                    medication = {
                        "name": name.group(1) if name else "未找到",
                        "time": time.group(1) if time else "未找到",
                        "indication": indication.group(1) if indication else "未找到",
                        "side_effects": side_effects.group(1) if side_effects else "未找到",
                        "color": color.group(1) if color else "未找到",
                        "appearance": appearance.group(1) if appearance else "未找到",
                        "warning": appearance.group(1) if warning else "未找到",
                    }

                    medications.append(medication)

                result["category"] = category
                result["data"] = medications  # 所有藥品的資訊列表
            else:
                result["error"] = "No response text."

        else:
            result["error"] = "無效的資料類型。"

        return result

    except FileNotFoundError:
        return {"error": f"Error: File not found at {image_path}"}
    except Exception as e:
        return {"error": f"An error occurred: {e}"}

def main():
    category = input("請選擇資料類型 (vital_signs/todo/medication): ")
    # image_path = input("請輸入圖片路徑: ")
    image_path =

    result = recognize_image(image_path, category)
    print(json.dumps(result, ensure_ascii=False, indent=4))

    user_response = input("\n請確認資料是否正確? (確認/重新辨識): ")
    if user_response == "確認":
        print("資料已確認。")
    elif user_response == "重新辨識":
        print("重新辨識中...")
        main()  # 重新執行辨識
    else:
        print("無效的選項。請輸入 '確認' 或 '重新辨識'。")

if __name__ == "__main__":
    main()


### 有將藥品加上count:


In [None]:
def encode_image(image_content):
    """Encode image content to base64"""
    return base64.b64encode(image_content).decode('utf-8')

def is_valid_for_category(image_type, category):
    """Check if image type is valid for the selected category"""
    # Add validation logic here if needed
    # For now, we'll assume all image types are valid for all categories
    return True

def recognize_image(image_path, category):
    try:
        # 讀取圖片
        with open(image_path, "rb") as image_file:
            image_content = image_file.read()

        image_type = imghdr.what(None, image_content)

        if image_type == 'jpeg':
            mime_type = 'image/jpeg'
        elif image_type == 'jpg':
            mime_type = 'image/jpeg'  # Corrected to 'image/jpeg'
        elif image_type == 'png':
            mime_type = 'image/png'
        elif image_type == 'heic':
            mime_type = 'image/heic'
        else:
            mime_type = 'application/octet-stream'

        encoded_image = encode_image(image_content)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro")

        # 檢查圖片類型與所選的資料類型是否匹配
        if not is_valid_for_category(image_type, category):
            return {"error": "選擇的資料類型與圖片不符，請重新選擇正確的資料類型。"}

        result = {}

        if category == "vital_signs":
            # 假設血壓辨識部分
            prompt = """
            請辨識圖片中的血壓計讀數，並以繁體中文輸出以下資訊：
            高壓: [高壓值]
            低壓: [低壓值]
            心律: [心律值] # 請注意是否有❤的符號，若有就注意符號旁的數字並進行處理
            脈搏: [脈搏值]
            """
            response = model.generate_content(
                [
                    {"mime_type": mime_type, "data": encoded_image},
                    prompt,
                ]
            )

            if response.text:
                high_pressure = re.search(r"高壓:\s*(\d+)", response.text)
                low_pressure = re.search(r"低壓:\s*(\d+)", response.text)
                heart_rate = re.search(r"心律:\s*(\d+)", response.text)
                pulse = re.search(r"脈搏:\s*(\d+)", response.text)

                result["category"] = category
                result["data"] = {
                    "high_pressure": high_pressure.group(1) if high_pressure else "未找到",
                    "low_pressure": low_pressure.group(1) if low_pressure else "未找到",
                    "heart_rate": heart_rate.group(1) if heart_rate else "未找到",
                    "pulse": pulse.group(1) if pulse else "未找到",
                }
            else:
                result["error"] = "No response text."

        elif category == "medication":
          # 這裡處理藥物資料的邏輯
          prompt = """
          請辨識圖片中的所有藥物資訊，並以繁體中文輸出以下資訊：
          藥物名稱: [藥物名稱]
          用藥時間: [用藥時間 (HH:MM)]
          適應症: [適應症]
          副作用: [副作用]
          藥物顏色: [藥物顏色]
          藥物外觀: [藥物外觀]
          服藥警語: [服藥警語] # 服藥的注意事項(如果有)
          若有多個藥物，請分別輸出每個藥物的資訊。
          """
          response = model.generate_content(
              [
                  {"mime_type": mime_type, "data": encoded_image},
                  prompt,
              ]
          )

          if response.text:
              medications = []
              medication_data = response.text.split("\n\n")  # 假設每個藥品之間以空行分隔
              valid_count = 0  # 記錄有效資料數量

              for data in medication_data:
                  name = re.search(r"藥物名稱:\s*(.+)", data)
                  time = re.search(r"用藥時間:\s*(.+)", data)
                  indication = re.search(r"適應症:\s*(.+)", data)
                  side_effects = re.search(r"副作用:\s*(.+)", data)
                  color = re.search(r"藥物顏色:\s*(.+)", data)
                  appearance = re.search(r"藥物外觀:\s*(.+)", data)
                  warning = re.search(r"服藥警語:\s*(.+)", data)

                  medication = {
                      "name": name.group(1) if name else "未找到",
                      "time": time.group(1) if time else "未找到",
                      "indication": indication.group(1) if indication else "未找到",
                      "side_effects": side_effects.group(1) if side_effects else "未找到",
                      "color": color.group(1) if color else "未找到",
                      "appearance": appearance.group(1) if appearance else "未找到",
                      "warning": warning.group(1) if warning else "未找到",

                  }

                  # 檢查藥物資料是否有效
                  if not all(value == "未找到" for value in medication.values()):
                      medications.append(medication)
                      valid_count += 1  # 只增加有效資料

              # 最終結果
              result["category"] = category
              result["data"] = medications  # 所有藥品的資訊列表
              result["valid_count"] = valid_count  # 輸出有效資料筆數
          else:
              result["error"] = "No response text."


        elif category == "todo":
            # 處理待辦事項的邏輯
            prompt = """
            請辨識圖片中的待辦事項，並以繁體中文輸出以下資訊：
            事項: [事項內容]
            期限: [完成期限]
            優先級: [優先級]
            若有多個待辦事項，請分別輸出每個事項的資訊。
            """
            response = model.generate_content(
                [
                    {"mime_type": mime_type, "data": encoded_image},
                    prompt,
                ]
            )

            if response.text:
                todos = []
                todo_data = response.text.split("\n\n")
                valid_count = 0

                for data in todo_data:
                    item = re.search(r"事項:\s*(.+)", data)
                    deadline = re.search(r"期限:\s*(.+)", data)
                    priority = re.search(r"優先級:\s*(.+)", data)

                    todo = {
                        "item": item.group(1) if item else "未找到",
                        "deadline": deadline.group(1) if deadline else "未找到",
                        "priority": priority.group(1) if priority else "未找到",
                    }

                    if "未找到" not in todo.values():
                        valid_count += 1
                    todos.append(todo)

                result["category"] = category
                result["data"] = todos
                result["valid_count"] = valid_count
            else:
                result["error"] = "No response text."

        else:
            result["error"] = "無效的資料類型。"

        return result

    except FileNotFoundError:
        return {"error": f"Error: File not found at {image_path}"}
    except Exception as e:
        return {"error": f"An error occurred: {e}"}

def main():
    category = input("請選擇資料類型 (vital_signs/todo/medication): ")
    # 允許用戶輸入圖片路徑
    # image_path = input("請輸入圖片路徑: ")
    # 默認路徑範例
    image_path = ''

    result = recognize_image(image_path, category)
    print(json.dumps(result, ensure_ascii=False, indent=4))

    user_response = input("\n請確認資料是否正確? (確認/重新辨識): ")
    if user_response == "確認":
        print("資料已確認。")
    elif user_response == "重新辨識":
        print("重新辨識中...")
        main()  # 重新執行辨識
    else:
        print("無效的選項。請輸入 '確認' 或 '重新辨識'。")

if __name__ == "__main__":
    main()

.env test

In [None]:
absolute_path = "/content/drive/My Drive/Colab Notebooks/CP_im_re/PictureSet"

os.chdir(absolute_path)
print(os.getcwd())

load_dotenv()
print(os.environ.get("IMAGE_PATH"))

## 由llm辨識圖片類別

In [None]:
def encode_image(image_content):
    """Encode image content to base64"""
    return base64.b64encode(image_content).decode('utf-8')

def is_valid_for_category(image_type, category):
    """Check if image type is valid for the selected category"""
    # Add validation logic here if needed
    # For now, we'll assume all image types are valid for all categories
    return True

def recognize_image(image_path):
    try:
        # 讀取圖片
        with open(image_path, "rb") as image_file:
            image_content = image_file.read()

        image_type = imghdr.what(None, image_content)

        if image_type == 'jpeg':
            mime_type = 'image/jpeg'
        elif image_type == 'jpg':
            mime_type = 'image/jpeg'  # Corrected to 'image/jpeg'
        elif image_type == 'png':
            mime_type = 'image/png'
        elif image_type == 'heic':
            mime_type = 'image/heic'
        else:
            mime_type = 'application/octet-stream'

        encoded_image = encode_image(image_content)
        model = genai.GenerativeModel(model_name="gemini-1.5-pro")

        # 先讓模型自動辨識圖片類別（vital_signs, medication, todo）
        prompt = """
        請根據圖片自動識別其內容，並以繁體中文輸出以下資訊：
        類別: [圖片類別]  # 可能的類別有: vital_signs(內容有可能是血壓計、健康檢查單、或是圖片等形式), medication(藥物), todo(代辦事項)

        """

        response = model.generate_content(
            [
                {"mime_type": mime_type, "data": encoded_image},
                prompt,
            ]
        )

        result = {}

        if response.text:
            # 根據模型回傳的內容進行解析
            category_match = re.search(r"類別:\s*(vital_signs|medication|todo)", response.text)
            info_match = re.search(r"相關資訊:\s*(.+)", response.text)

            category = category_match.group(1) if category_match else "未識別"
            info = info_match.group(1) if info_match else "未找到相關資訊"

            result["category"] = category
            result["data"] = info

            # 保留原來的程式邏輯不變

            if category == "vital_signs":

                prompt_vital = """
                請辨識圖片中的生命徵數讀數，並以繁體中文輸出以下資訊：
                高壓: [高壓值]
                低壓: [低壓值]
                心律: [心律值] # 請注意是否有❤的符號，若有就注意符號旁的數字並進行處理
                脈搏: [脈搏值]
                """
                response = model.generate_content(
                    [
                        {"mime_type": mime_type, "data": encoded_image},
                        prompt_vital,
                    ]
                )

                if response.text:
                    high_pressure = re.search(r"高壓:\s*(\d+)", response.text)
                    low_pressure = re.search(r"低壓:\s*(\d+)", response.text)
                    heart_rate = re.search(r"心律:\s*(\d+)", response.text)
                    pulse = re.search(r"脈搏:\s*(\d+)", response.text)

                    result["data"] = {
                        "high_pressure": high_pressure.group(1) if high_pressure else "未找到",
                        "low_pressure": low_pressure.group(1) if low_pressure else "未找到",
                        "heart_rate": heart_rate.group(1) if heart_rate else "未找到",
                        "pulse": pulse.group(1) if pulse else "未找到",
                    }
                else:
                    result["error"] = "No response text for vital signs."

            elif category == "medication":
              # 這裡處理藥物資料的邏輯
              prompt = """
              請辨識圖片中的所有藥物資訊，並以繁體中文輸出以下資訊：
              藥物名稱: [藥物名稱]
              用藥時間: [用藥時間 (HH:MM)]
              適應症: [適應症]
              副作用: [副作用]
              藥物顏色: [藥物顏色]
              藥物外觀: [藥物外觀]
              服藥警語: [服藥警語] # 服藥的注意事項(如果有)
              若有多個藥物，請分別輸出每個藥物的資訊。
              """
              response = model.generate_content(
                  [
                      {"mime_type": mime_type, "data": encoded_image},
                      prompt,
                  ]
              )

              if response.text:
                  medications = []
                  medication_data = response.text.split("\n\n")  # 假設每個藥品之間以空行分隔
                  valid_count = 0  # 記錄有效資料數量

                  for data in medication_data:
                      name = re.search(r"藥物名稱:\s*(.+)", data)
                      time = re.search(r"用藥時間:\s*(.+)", data)
                      indication = re.search(r"適應症:\s*(.+)", data)
                      side_effects = re.search(r"副作用:\s*(.+)", data)
                      color = re.search(r"藥物顏色:\s*(.+)", data)
                      appearance = re.search(r"藥物外觀:\s*(.+)", data)
                      warning = re.search(r"服藥警語:\s*(.+)", data)

                      medication = {
                          "name": name.group(1) if name else "未找到",
                          "time": time.group(1) if time else "未找到",
                          "indication": indication.group(1) if indication else "未找到",
                          "side_effects": side_effects.group(1) if side_effects else "未找到",
                          "color": color.group(1) if color else "未找到",
                          "appearance": appearance.group(1) if appearance else "未找到",
                          "warning": warning.group(1) if warning else "未找到",

                      }

                      # 檢查藥物資料是否有效
                      if not all(value == "未找到" for value in medication.values()):
                          medications.append(medication)
                          valid_count += 1  # 只增加有效資料

                  # 最終結果
                  result["category"] = category
                  result["data"] = medications  # 所有藥品的資訊列表
                  result["valid_count"] = valid_count  # 輸出有效資料筆數
              else:
                  result["error"] = "No response text."

            elif category == "todo":

                prompt_todo = """
                請辨識圖片中的待辦事項，並以繁體中文輸出以下資訊：
                事項: [事項內容]
                期限: [完成期限]
                優先級: [優先級]
                """
                response = model.generate_content(
                    [
                        {"mime_type": mime_type, "data": encoded_image},
                        prompt_todo,
                    ]
                )

                if response.text:
                    todos = []
                    todo_data = response.text.split("\n\n")
                    for data in todo_data:
                        item = re.search(r"事項:\s*(.+)", data)
                        deadline = re.search(r"期限:\s*(.+)", data)
                        priority = re.search(r"優先級:\s*(.+)", data)

                        todo = {
                            "item": item.group(1) if item else "未找到",
                            "deadline": deadline.group(1) if deadline else "未找到",
                            "priority": priority.group(1) if priority else "未找到",
                        }

                        todos.append(todo)

                    result["data"] = todos
                else:
                    result["error"] = "No response text for todo."

            else:
                result["error"] = "無法識別的資料類型。"

        else:
            result["error"] = "No response text from model."

        return result

    except FileNotFoundError:
        return {"error": f"Error: File not found at {image_path}"}
    except Exception as e:
        return {"error": f"An error occurred: {e}"}


def main():

    load_dotenv()
    image_path = os.getenv("IMAGE_PATH")

    result = recognize_image(image_path)
    print(json.dumps(result, ensure_ascii=False, indent=4))

    user_response = input("\n請確認資料是否正確? (確認/重新辨識): ")
    if user_response == "確認":
        print("資料已確認。")
    elif user_response == "重新辨識":
        print("重新辨識中...")
        main()  # 重新執行辨識
    else:
        print("無效的選項。請輸入 '確認' 或 '重新辨識'。")

if __name__ == "__main__":
    main()