In [24]:
## imports

import cv2
import pytesseract
import platform
import requests
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from pytesseract import Output
import re

In [9]:
## pytesseract setup

def init():
    if (platform.system()=="Darwin"):
        pytesseract.pytesseract.tesseract_cmd = r"/opt/homebrew/bin/tesseract"
init()

In [10]:
def create_file_from_link(image_link):   
    image_name = "image.jpg"

    try:
        # Download the image
        response = requests.get(image_link, stream=True)
        if response.status_code == 200:
            # Save the image locally
            with open(image_name, 'wb') as file:
                file.write(response.content)
            return image_name
        else:
            print(f"Failed to download image. Status code: {response.status_code}")
            return -1
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return -1

In [11]:
## constants

# Entity map for dimensional indicators
entity_unit_map = {
    'width': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'depth': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'height': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'item_weight': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'maximum_weight_recommendation': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'voltage': {'kilovolt', 'millivolt', 'volt'},
    'wattage': {'kilowatt', 'watt'},
    'item_volume': {'centilitre', 'cubic foot', 'cubic inch', 'cup', 'decilitre', 'fluid ounce', 'gallon', 'imperial gallon', 'litre', 'microlitre', 'millilitre', 'pint', 'quart'}
}

# Regex patterns for dimensions
patterns = {
    "weight": r'\b(\d+(\.\d+)?\s?(g|grams|kg|kilograms|lb|lbs|pounds))\b',
    "length": r'\b(\d+(\.\d+)?\s?(cm|centimeters|mm|meters|inches|feet|ft))\b',
    "voltage": r'\b(\d+(\.\d+)?\s?(V|volts|kV|kilovolts))\b',
    "wattage": r'\b(\d+(\.\d+)?\s?(W|watts|kW|kilowatts))\b',
    "volume": r'\b(\d+(\.\d+)?\s?(L|litres|ml|millilitres|gallon|cup|pint|quart))\b'
}

In [12]:
def rotate_image(image, angle):
    
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def get_words_with_metadata(image, orientation):

    data = pytesseract.image_to_data(image, output_type=Output.DICT)
    words = []
    for i in range(len(data['text'])):
        word = data['text'][i].strip()
        if word:
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            confidence = data['conf'][i]
            words.append({
                'word': word,
                'position': {'x': x, 'y': y, 'width': w, 'height': h},
                'orientation': orientation,
                'confidence': confidence
            })

    return words

In [26]:
def extract_dimensional_words(word_list):

    extracted_words = {key: [] for key in patterns.keys()}

    for word_data in word_list:
        word = word_data['word']
        for key, pattern in patterns.items():
            matches = re.findall(pattern, word, re.IGNORECASE)
            if matches:
                extracted_words[key].append({
                    'word': matches[0][0],
                    'metadata': word_data
                })

    return extracted_words

def extract_and_classify(image):

    # Get words from the original image (0 degrees orientation)
    words_0_deg = get_words_with_metadata(image, orientation='0 degrees')

    # Rotate image +90 degrees and extract words
    image_90_deg = rotate_image(image, 90)
    words_90_deg = get_words_with_metadata(image_90_deg, orientation='+90 degrees')

    # Rotate image -90 degrees and extract words
    image_neg_90_deg = rotate_image(image, -90)
    words_neg_90_deg = get_words_with_metadata(image_neg_90_deg, orientation='-90 degrees')

    # Combine all words from all orientations
    all_words = words_0_deg + words_90_deg + words_neg_90_deg
    
    # Classify words into dimensional categories
    classified_words = extract_dimensional_words(all_words)

    return classified_words

In [16]:
def binarize_image(filename):
    image = cv2.imread(filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
    
    # # Apply Adaptive Mean Thresholding
    # mean_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
    #                                     cv2.THRESH_BINARY, 11, 2)
    
    # # Apply Adaptive Gaussian Thresholding
    # gaussian_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    #                                         cv2.THRESH_BINARY, 11, 2)
    return thresh_image

In [31]:
train_data = pd.read_csv(r"dataset/train.csv")

In [32]:
train_data.head()

df = train_data.iloc[:50]

In [69]:
empty_data = {'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}

In [70]:
image_metadata = []
for index, row in df.iterrows():
    file_name = create_file_from_link(row['image_link'])

    if(file_name == -1):
        image_metadata.append(empty_data)
        continue
    
    preprocessed_image = binarize_image(file_name)
    a = extract_and_classify(preprocessed_image)
    print(a)
    image_metadata.append(a)
    os.remove(file_name)

{'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}
{'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}
{'weight': [{'word': '02g', 'metadata': {'word': '02g', 'position': {'x': 723, 'y': 698, 'width': 34, 'height': 22}, 'orientation': '0 degrees', 'confidence': 69}}, {'word': '0g', 'metadata': {'word': '0g', 'position': {'x': 289, 'y': 328, 'width': 17, 'height': 16}, 'orientation': '+90 degrees', 'confidence': 57}}, {'word': '7G', 'metadata': {'word': '7G', 'position': {'x': 499, 'y': 753, 'width': 17, 'height': 25}, 'orientation': '+90 degrees', 'confidence': 0}}, {'word': '4G', 'metadata': {'word': '4G', 'position': {'x': 439, 'y': 651, 'width': 17, 'height': 17}, 'orientation': '+90 degrees', 'confidence': 29}}, {'word': '02g', 'metadata': {'word': '02g', 'position': {'x': 361, 'y': 723, 'width': 22, 'height': 34}, 'orientation': '-90 degrees', 'confidence': 69}}], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}
{'weight': [{'wor

In [74]:
df['image_data'] = image_metadata

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['image_data'] = image_metadata


In [75]:
req_df = df[['entity_name', 'entity_value', 'image_data', 'group_id']]

In [76]:
req_df.head()

Unnamed: 0,entity_name,entity_value,image_data,group_id
0,item_weight,500.0 gram,"{'weight': [], 'length': [], 'voltage': [], 'w...",748919
1,item_volume,1.0 cup,"{'weight': [], 'length': [], 'voltage': [], 'w...",916768
2,item_weight,0.709 gram,"{'weight': [{'word': '02g', 'metadata': {'word...",459516
3,item_weight,0.709 gram,"{'weight': [{'word': '2g', 'metadata': {'word'...",459516
4,item_weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432


In [77]:
for i in range(len(req_df)):
    req_df.loc[i, 'entity_name'] = req_df.loc[i, 'entity_name'].split('_')[-1] 

In [78]:
req_df

Unnamed: 0,entity_name,entity_value,image_data,group_id
0,weight,500.0 gram,"{'weight': [], 'length': [], 'voltage': [], 'w...",748919
1,volume,1.0 cup,"{'weight': [], 'length': [], 'voltage': [], 'w...",916768
2,weight,0.709 gram,"{'weight': [{'word': '02g', 'metadata': {'word...",459516
3,weight,0.709 gram,"{'weight': [{'word': '2g', 'metadata': {'word'...",459516
4,weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432
5,weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432
6,weight,1400 milligram,"{'weight': [{'word': '19g', 'metadata': {'word...",731432
7,weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432
8,weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432
9,weight,1400 milligram,"{'weight': [], 'length': [], 'voltage': [], 'w...",731432


In [79]:
for index, row in req_df.iterrows():
    key = row['entity_name']
    
    ## TODO: handle things here, like weight recomendation, etc..
    ## this is done very naively
    print(req_df.at[index, 'image_data'])
    print(key, type(key))
    req_df.at[index, 'image_data'] = row['image_data'].get(key, None)  

{'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}
weight <class 'str'>
{'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}
volume <class 'str'>
{'weight': [{'word': '02g', 'metadata': {'word': '02g', 'position': {'x': 723, 'y': 698, 'width': 34, 'height': 22}, 'orientation': '0 degrees', 'confidence': 69}}, {'word': '0g', 'metadata': {'word': '0g', 'position': {'x': 289, 'y': 328, 'width': 17, 'height': 16}, 'orientation': '+90 degrees', 'confidence': 57}}, {'word': '7G', 'metadata': {'word': '7G', 'position': {'x': 499, 'y': 753, 'width': 17, 'height': 25}, 'orientation': '+90 degrees', 'confidence': 0}}, {'word': '4G', 'metadata': {'word': '4G', 'position': {'x': 439, 'y': 651, 'width': 17, 'height': 17}, 'orientation': '+90 degrees', 'confidence': 29}}, {'word': '02g', 'metadata': {'word': '02g', 'position': {'x': 361, 'y': 723, 'width': 22, 'height': 34}, 'orientation': '-90 degrees', 'confidence': 69}}], 'length': [], 'voltage': [], 'wat

In [80]:
req_df

Unnamed: 0,entity_name,entity_value,image_data,group_id
0,weight,500.0 gram,[],748919
1,volume,1.0 cup,[],916768
2,weight,0.709 gram,"[{'word': '02g', 'metadata': {'word': '02g', '...",459516
3,weight,0.709 gram,"[{'word': '2g', 'metadata': {'word': '2g', 'po...",459516
4,weight,1400 milligram,[],731432
5,weight,1400 milligram,[],731432
6,weight,1400 milligram,"[{'word': '19g', 'metadata': {'word': '19g', '...",731432
7,weight,1400 milligram,[],731432
8,weight,1400 milligram,[],731432
9,weight,1400 milligram,[],731432


In [86]:
print(req_df.iloc[3]['image_data'])

[{'word': '2g', 'metadata': {'word': '2g', 'position': {'x': 457, 'y': 713, 'width': 30, 'height': 10}, 'orientation': '0 degrees', 'confidence': 12}}, {'word': '1g', 'metadata': {'word': 'unte4>1g)', 'position': {'x': 422, 'y': 827, 'width': 8, 'height': 46}, 'orientation': '0 degrees', 'confidence': 0}}, {'word': '281g', 'metadata': {'word': '281g', 'position': {'x': 348, 'y': 858, 'width': 9, 'height': 20}, 'orientation': '0 degrees', 'confidence': 42}}, {'word': '02g', 'metadata': {'word': '(02g', 'position': {'x': 554, 'y': 375, 'width': 26, 'height': 8}, 'orientation': '+90 degrees', 'confidence': 37}}, {'word': '0g', 'metadata': {'word': '0g', 'position': {'x': 375, 'y': 403, 'width': 9, 'height': 8}, 'orientation': '+90 degrees', 'confidence': 8}}, {'word': '1g', 'metadata': {'word': '1g', 'position': {'x': 690, 'y': 623, 'width': 18, 'height': 8}, 'orientation': '+90 degrees', 'confidence': 41}}, {'word': '220g', 'metadata': {'word': '220g', 'position': {'x': 689, 'y': 637, 'w

In [None]:
## model

In [89]:
req_df.to_csv('processed.csv')