In [None]:
import albumentations as A
import matplotlib.pyplot as plt
import random
from PIL import Image
import cv2
import numpy as np
from pathlib import Path
import os 

def visualize(image):
    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.imshow(image)
    
def generate_random_odd_number(min_value, max_value):
    # Generate a random integer in the specified range
    number = random.randint(min_value, max_value)

    if number % 2 == 0:
        number = number + 1 if number < max_value else number - 1
    return number

def get_transformed_image(image_path, seed):
    
    random.seed(seed)
    np.random.seed(seed)
    
    
    image = cv2.imread(image_path)
    width, height = image.shape[0], image.shape[1]
    patch_size = int(np.min([width, height]) / 1.3)

    transform = A.Compose([
        
        A.OneOf(
            [
                # A.RandomCrop(height = patch_size, width = patch_size),
                A.CenterCrop(height= patch_size, width= patch_size),
            ],
            p = 0.4
        ),
        A.CLAHE(clip_limit=2, p = 0.5),
        A.MotionBlur(blur_limit=generate_random_odd_number(9, 27), p=0.8),
        A.RandomBrightnessContrast(brightness_limit=0.7, contrast_limit=0.7, p=1),
        A.ShiftScaleRotate(shift_limit=0, scale_limit=0.2, rotate_limit=180, p=0.8),
        A.HueSaturationValue(p = 0.6)
        
    ])
    
    
    augmented_image = transform(image = image)["image"]
    return augmented_image


root_dir = Path("/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540")


for seed in range(5):
    augmented_dataset_path = root_dir / "Synthetic_Dataset"
    augmented_dataset_path.mkdir(parents=True, exist_ok=True)
    for ori_pic in os.listdir(root_dir / "JPEG_Dataset"):
        if ori_pic != '.DS_Store':
            augmented_image = get_transformed_image(str(root_dir / "JPEG_Dataset" /  ori_pic), seed)
            cv2.imwrite(str(augmented_dataset_path / (ori_pic[:-5] + "_" + str(seed) + ".jpeg")), augmented_image)
    


# #Image.open("/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/JPEG_Dataset/1.jpg")

# visualize(augmented_image)
# 

In [None]:
def plot_gray(image):
    plt.figure(figsize=(16,10))
    return plt.imshow(image, cmap='Greys_r')


from skimage.filters import threshold_local
def bw_scanner(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    T = threshold_local(gray, 21, offset = 5, method = "gaussian")
    return (gray > T).astype("uint8") * 255
img = bw_scanner(cv2.imread("/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/Synthetic_Dataset/47_2.jpeg"))


plot_gray(img)

output = Image.fromarray(img)
output.save('/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/result.png')

In [None]:
import easyocr
import cv2
def EasyOCR(image) -> str:
    reader = easyocr.Reader(['en']) # initialize OCR
    result = reader.readtext(image) # input image
    return "\n".join([res[1] for res in result])
image_path = '/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/result.png'
img =Image.open(image_path)#cv2.imread(image_path)#


text = EasyOCR(img)

In [None]:
print(text)

In [None]:
import pytesseract
from PIL import Image
import time

# Start the timer
start_time = time.time()

# Load the image from file
image_path = '/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/Synthetic_Dataset/1_2.jpeg'#'/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/result.png'
img = Image.open(image_path)

# Use tesseract to do OCR on the image
text = pytesseract.image_to_string(img)

# Stop the timer
end_time = time.time()

# Calculate the time taken to process the image and extract text
processing_time = end_time - start_time

text, processing_time


In [None]:
print(text)

In [2]:
%env OPENAI_API_KEY= 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 

env: OPENAI_API_KEY=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


In [8]:
from openai import OpenAI
import base64
import json
import os
import pandas as pd
from PIL import Image
import wandb 
from tqdm import tqdm
from skimage.filters import threshold_local
import torch
import easyocr
import cv2
import pytesseract
from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
    
def img_preprocess(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    T = threshold_local(gray, 21, offset = 5, method = "gaussian")
    return (gray > T).astype("uint8") * 255

def llm_postprocess(ocr_text):
    prompt = """
            Here is the extracted text from a product' ingredient part. 
            Could you please identify the ingredients and return the potential allergy ingredient? 
            Your return only includes the alleries with bullet points.
            """
    client = OpenAI() 
    response = client.chat.completions.create(
        model='gpt-4-vision-preview', #gpt-4
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "text", "text": ocr_text},
                ],
            }
        ],
        max_tokens=800,
    )
    allergies = response.choices[0].message.content
    
    return allergies


def easyocr_model(image_path, gpu):
    reader = easyocr.Reader(['en'])#, gpu=gpu) # initialize OCR
    image = img_preprocess(image_path)
    # reader = easyocr.Reader(['en'], detection='DB', recognition = 'Transformer')
    result = reader.readtext(image) # input image
    ocr_text =  "\n".join([res[1] for res in result])
    allergies = llm_postprocess(ocr_text)
    return allergies

def tesseract3_model(image_path):
    image = Image.open(image_path)
    image = img_preprocess(image_path)
    # Use tesseract to do OCR on the image
    ocr_text = pytesseract.image_to_string(image, config=r'--oem 0')
    
    allergies = llm_postprocess(ocr_text)
    return allergies

# def mmocr_model(image_path):
    
    
# def kerasocr_model(image_path):
    



def chatgpt_model(image_path):
    prompt = """
            Here is the picture from a product' ingredient part. 
            Could you please identify the ingredients and return the potential allergy ingredient? 
            Your return only includes the alleries with bullet points.
            """
    image_url = f"data:image/jpeg;base64,{encode_image(image_path)}"

    client = OpenAI() 

    response = client.chat.completions.create(
        model='gpt-4-vision-preview', #gpt-4
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url}
                    }
                ],
            }
        ],
        max_tokens=800,
    )
    allergies = response.choices[0].message.content
    return allergies
    
# print(llm_postprocess(text))

image_path = "/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/JPEG_Dataset/9.jpg"

print(easyocr_model(image_path=image_path, gpu=False))

# print("-"*20)

# print(tesseract3_model(image_path=image_path))
# print("-"*20)

# print(chatgpt_model(image_path=image_path))

Based on the extracted text, the potential allergy ingredients are:
- CHEESE BLEND (PART SKIM MOZZARELLA CHEESE)
- CALCIUM LACTATE 

Note: The text is not clear and may contain other potential allergens that are not easily identifiable.


In [7]:
import mmocr

ModuleNotFoundError: No module named 'mmocr'

In [None]:
from mmocr.utils.ocr import MMOCR
mmocr = MMOCR(det='TextSnake', recog='SAR', kie='SDMGR')
mmocr.readtext('demo/demo_kie.jpeg', print_result=True, output='outputs/demo_kie_pred.jpg')

In [None]:
from paddleocr import PaddleOCR

# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
img_path = "/Users/lihongxuan/Desktop/AIPI/Courses/AIPI540/AIPI-540/JPEG_Dataset/9.jpg"
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)

In [None]:
from openai import OpenAI
import base64
import json
import os
import pandas as pd
from PIL import Image
import wandb 
from tqdm import tqdm
from skimage.filters import threshold_local
import torch
import easyocr
import cv2
import pytesseract
from PIL import Image
import time


# %env OPENAI_API_KEY= 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 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
    
def img_preprocess(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    T = threshold_local(gray, 21, offset = 5, method = "gaussian")
    return (gray > T).astype("uint8") * 255

def llm_postprocess(ocr_text):
    prompt = """
            Here is the extracted text from a product's ingredient part. 
            Could you please identify the ingredients and return the potential allergy ingredient? 
            Your return only includes the alleries with bullet points.
            """
    client = OpenAI() 
    response = client.chat.completions.create(
        model='gpt-4-vision-preview', #gpt-4
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "text", "text": ocr_text},
                ],
            }
        ],
        max_tokens=800,
    )
    allergies = response.choices[0].message.content
    return allergies


def easyocr_model(image_path, gpu):
    reader = easyocr.Reader(['en'], gpu=gpu) # initialize OCR
    result = reader.readtext(image_path) # input image
    ocr_text =  "\n".join([res[1] for res in result])
    allergies = llm_postprocess(ocr_text)
    return ocr_text, allergies

def easyocr_model_w_pre(image_path, gpu):
    reader = easyocr.Reader(['en'], gpu=gpu) # initialize OCR
    image = img_preprocess(image_path)
    result = reader.readtext(image) # input image
    ocr_text =  "\n".join([res[1] for res in result])
    allergies = llm_postprocess(ocr_text)
    return ocr_text, allergies

def tesseract_model(image_path):
    image = Image.open(image_path)
    ocr_text = pytesseract.image_to_string(image)
    allergies = llm_postprocess(ocr_text)
    return ocr_text, allergies

def tesseract_model_w_pre(image_path):
    image = Image.open(image_path)
    image = img_preprocess(image_path)
    # Use tesseract to do OCR on the image
    ocr_text = pytesseract.image_to_string(image)
    allergies = llm_postprocess(ocr_text)
    return ocr_text, allergies

# def mmocr_model(image_path):
    
    
# def kerasocr_model(image_path):


def chatgpt_model(image_path):
    prompt = """
            Here is the picture from a product' ingredient part. 
            Could you please identify the ingredients and return the potential allergy ingredient? 
            Your return only includes the alleries with bullet points.
            """
    image_url = f"data:image/jpeg;base64,{encode_image(image_path)}"

    client = OpenAI() 

    response = client.chat.completions.create(
        model='gpt-4-vision-preview', #gpt-4
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url}
                    }
                ],
            }
        ],
        max_tokens=800,
    )
    allergies = response.choices[0].message.content
    return allergies
    



data_dir = "/home/featurize/work/OCR_exper/JPEG_Dataset_synthetic/"
for image_local in tqdm(os.listdir(data_dir)[]):
    
    try:
        image_array = wandb.Image(Image.open(data_dir + image_local))
    except:
        continue
    
    image_path  = data_dir + image_local

    start_time = time.time()
    print(image_path)
    easyocr_text, easyocr_result = easyocr_model(image_path=image_path, gpu=True)
    end_time = time.time()
    easyocr_time = end_time - start_time
    print(easyocr_result)
    print("\n")

    start_time = time.time()
    easyocr_w_pre_text, easyocr_w_pre_result = easyocr_model_w_pre(image_path=image_path, gpu=True)
    end_time = time.time()
    easyocr_w_pre_time = end_time - start_time
    print(easyocr_w_pre_result)
    print("\n")

    start_time = time.time()
    tesseract_text, tesseract_result = tesseract_model(image_path=image_path)
    end_time = time.time()
    tesseract_time = end_time - start_time
    print(tesseract_result)
    print("\n")

    start_time = time.time()
    tesseract_w_pre_text, tesseract_w_pre_result = tesseract_model_w_pre(image_path=image_path)
    end_time = time.time()
    tesseract_w_pre_time = end_time - start_time
    print(tesseract_w_pre_result)
    print("\n")

    start_time = time.time()
    chatgpt_result = chatgpt_model(image_path=image_path)
    end_time = time.time()
    chatgpt_time = end_time - start_time
    print(chatgpt_result)
    print("\n")
        
        