In [41]:
import cv2
import pytesseract
import platform
import requests
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from pytesseract import Output
import re

In [42]:
## pytesseract setup

def init():
    if (platform.system()=="Darwin"):
        pytesseract.pytesseract.tesseract_cmd = r"/opt/homebrew/bin/tesseract"
init()

In [43]:
def create_file_from_link(image_link):   
    image_name = "image.jpg"

    try:
        # Download the image
        response = requests.get(image_link, stream=True)
        if response.status_code == 200:
            # Save the image locally
            with open(image_name, 'wb') as file:
                file.write(response.content)
            return image_name
        else:
            print(f"Failed to download image. Status code: {response.status_code}")
            return -1
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return -1

In [44]:
def binarize_image(filename):
    image = cv2.imread(filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
    
    # # Apply Adaptive Mean Thresholding
    # mean_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
    #                                     cv2.THRESH_BINARY, 11, 2)
    
    # # Apply Adaptive Gaussian Thresholding
    # gaussian_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    #                                         cv2.THRESH_BINARY, 11, 2)
    return thresh_image

In [45]:
## constants
# Entity map for dimensional indicators
entity_unit_map = {
    'width': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'depth': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'height': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'item_weight': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'maximum_weight_recommendation': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'voltage': {'kilovolt', 'millivolt', 'volt'},
    'wattage': {'kilowatt', 'watt'},
    'item_volume': {'centilitre', 'cubic foot', 'cubic inch', 'cup', 'decilitre', 'fluid ounce', 'gallon', 'imperial gallon', 'litre', 'microlitre', 'millilitre', 'pint', 'quart'}
}

# Regex patterns for dimensions
patterns = {
    "weight": r'\b(\d+(\.\d+)?\s?(g|grams|kg|kilograms|lb|lbs|pounds))\b',
    "length": r'\b(\d+(\.\d+)?\s?(cm|centimeters|mm|meters|inches|feet|ft))\b',
    "voltage": r'\b(\d+(\.\d+)?\s?(V|volts|kV|kilovolts))\b',
    "wattage": r'\b(\d+(\.\d+)?\s?(W|watts|kW|kilowatts))\b',
    "volume": r'\b(\d+(\.\d+)?\s?(L|litres|ml|millilitres|gallon|cup|pint|quart))\b'
}

In [46]:
def rotate_image(image, angle):
    
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

def get_words_with_metadata(image, orientation):

    data = pytesseract.image_to_data(image, output_type=Output.DICT)
    words = []
    for i in range(len(data['text'])):
        word = data['text'][i].strip()
        if word:
            x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
            confidence = data['conf'][i]
            words.append({
                'word': word,
                'position': {'x': x, 'y': y, 'width': w, 'height': h},
                'orientation': orientation,
                'confidence': confidence
            })

    return words

In [47]:
def extract_dimensional_words(word_list):

    extracted_words = {key: [] for key in patterns.keys()}

    for word_data in word_list:
        word = word_data['word']
        for key, pattern in patterns.items():
            matches = re.findall(pattern, word, re.IGNORECASE)
            if matches:
                extracted_words[key].append({
                    'word': matches[0][0],
                    'metadata': word_data
                })

    return extracted_words

def extract_and_classify(image):

    # Get words from the original image (0 degrees orientation)
    words_0_deg = get_words_with_metadata(image, orientation='0 degrees')

    # Rotate image +90 degrees and extract words
    image_90_deg = rotate_image(image, 90)
    words_90_deg = get_words_with_metadata(image_90_deg, orientation='+90 degrees')

    # Rotate image -90 degrees and extract words
    image_neg_90_deg = rotate_image(image, -90)
    words_neg_90_deg = get_words_with_metadata(image_neg_90_deg, orientation='-90 degrees')

    # Combine all words from all orientations
    all_words = words_0_deg + words_90_deg + words_neg_90_deg
    
    # Classify words into dimensional categories
    classified_words = extract_dimensional_words(all_words)

    return classified_words

In [48]:
empty_data = {'weight': [], 'length': [], 'voltage': [], 'wattage': [], 'volume': []}


In [146]:
## return the possible output strings with probabilities

def get_data_from_image(image_link):
    image_metadata = []

    #get the image from link:
    file_name = create_file_from_link(image_link)
    
    if(file_name == -1):
        image_metadata.append(empty_data)

    else:
        #preprocess the image
        preprocessed_image = binarize_image(file_name)
        a = extract_and_classify(preprocessed_image)
        image_metadata.append(a)
        os.remove(file_name)
    return image_metadata

In [93]:
##{'weight': 
## [{'word': '02g', 
##'metadata': {'word': '02g', 'position': {'x': 723, 'y': 698, 'width': 34, 'height': 22}, 'orientation': '0 degrees', 'confidence': 69}},
##  {'word': '0g', 
## 'metadata': {'word': '0g', 'position': {'x': 289, 'y': 328, 'width': 17, 'height': 16}, 'orientation': '+90 degrees', 'confidence': 57}}

In [114]:
def normalize_columns(columns):
    
    scaling_factors = {
     '0': {'mean': np.float64(579.5818181818182), 'std': np.float64(428.7543841006432)},
     '1': {'mean': np.float64(1007.4545454545455), 'std': np.float64(521.0780782535028)}, 
     '2': {'mean': np.float64(40.836363636363636), 'std': np.float64(35.57764780826612)},
     '3': {'mean': np.float64(46.345454545454544), 'std': np.float64(41.67476284978546)}
    }

    new_columns = {
        'feature_0':(columns['feature_0'] - scaling_factors['0']['mean'])/scaling_factors['0']['std'],
        'feature_1':(columns['feature_1'] - scaling_factors['1']['mean'])/scaling_factors['1']['std'],
        'feature_2':(columns['feature_2'] - scaling_factors['2']['mean'])/scaling_factors['2']['std'],
        'feature_3':(columns['feature_3'] - scaling_factors['3']['mean'])/scaling_factors['3']['std'],
        'feature_4':columns['feature_4']
    }

    return new_columns

In [115]:
def format_features(columns):
    
    new_data = np.array([[columns['feature_0'], 
                          columns['feature_1'], 
                          columns['feature_2'], 
                          columns['feature_3']
                         ]])
    zero, pos_rot, neg_rot = False, False, False
    if(columns['feature_4'] == '0 degrees'):
      zero = True
    if(columns['feature_4'] == '+90 degrees'):
      pos_rot = True
    if(columns['feature_4'] == '-90 degrees'):
      neg_rot = True

    new_data  = np.append(new_data, [zero, pos_rot, neg_rot])
    return new_data

In [116]:
import joblib
import numpy as np

In [125]:
def get_probability_from_model(columns):
    columns = normalize_columns(columns)

    columns = format_features(columns)
    
    # Load the saved model
    model = joblib.load('weight_model.pkl')

    feature_names = ['feature_0', 'feature_1', 'feature_2', 'feature_3', '0 degrees', '+90 degrees', '-90 degrees']

    # Convert the list to a DataFrame with correct feature names
    features = pd.DataFrame([columns], columns=feature_names)

    predicted_proba = model.predict_proba(features)
    return predicted_proba[0]

In [135]:
def get_probabilities(image_link, category_id, entity_name):
    data = get_data_from_image(image_link)
    data = data[0]
    options = []
    required_data = data[entity_name]

    for item in required_data:
        word = item['word']
        columns = {
            'feature_0':item['metadata']['position']['x'],
            'feature_1':item['metadata']['position']['y'],
            'feature_2':item['metadata']['position']['width'],
            'feature_3':item['metadata']['position']['height'],
            'feature_4':item['metadata']['orientation'],
            'feature_5':category_id
        }

        prob = get_probability_from_model(columns)
        options.append({'word':word,'prob':prob[1]})
    return options

In [143]:
##testing the model

def predictor(image_link, category_id, entity_name):
    a = get_probabilities(image_link, category_id, entity_name)
    max_prob = -1
    best_guess = None
    for values in a:
        if(values['prob']>max_prob):
            max_prob = values['prob']
            best_guess = values['word']
    return best_guess

In [145]:
guess = predictor("https://m.media-amazon.com/images/I/81N73b5khVL.jpg", 639090,"weight")
# TODO: format output
print(guess)

metadata [{'weight': [{'word': '30KG', 'metadata': {'word': '30KG', 'position': {'x': 1386, 'y': 43, 'width': 187, 'height': 63}, 'orientation': '0 degrees', 'confidence': 91}}, {'word': '30KG', 'metadata': {'word': '30KG,', 'position': {'x': 549, 'y': 313, 'width': 121, 'height': 42}, 'orientation': '0 degrees', 'confidence': 92}}, {'word': '30KG', 'metadata': {'word': '30KG', 'position': {'x': 1495, 'y': 1386, 'width': 63, 'height': 187}, 'orientation': '-90 degrees', 'confidence': 92}}, {'word': '30KG', 'metadata': {'word': '30KG,', 'position': {'x': 1246, 'y': 549, 'width': 42, 'height': 121}, 'orientation': '-90 degrees', 'confidence': 92}}], 'length': [{'word': '5mm', 'metadata': {'word': '5mm', 'position': {'x': 541, 'y': 181, 'width': 93, 'height': 34}, 'orientation': '0 degrees', 'confidence': 95}}, {'word': '5mm', 'metadata': {'word': '5mm', 'position': {'x': 1386, 'y': 541, 'width': 34, 'height': 93}, 'orientation': '-90 degrees', 'confidence': 95}}], 'voltage': [], 'wattage