I have created some copies of this file, so feel free to edit and make changes

In [None]:
!pip install torch torchvision torchaudio
!pip install easyocr
!pip install transformers
!pip install opencv-python-headless


Collecting easyocr
  Downloading easyocr-1.7.1-py3-none-any.whl.metadata (11 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

In [None]:
import os
import pandas as pd
import logging
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import urllib.request
import time

import easyocr  # Deep learning-based OCR
from transformers import pipeline  # NER from Transformers
import re  # Regular expressions for pattern matching



In [None]:
# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # Initialize reader for English language

# Initialize NER Pipeline using Transformers
ner = pipeline("ner", model="dslim/bert-base-NER")

# Setup logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Correct paths for the CSV files
train_csv_file_path = r'/content/train.csv'
test_csv_file_path = r'/content/test.csv'


# Load the Data
logging.info("Loading training and testing data from CSV files.")
train_df = pd.read_csv(train_csv_file_path)
test_df = pd.read_csv(test_csv_file_path)

# Function to create directory if it does not exist
def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        logging.info(f"Created directory: {directory_path}")
    else:
        logging.info(f"Directory already exists: {directory_path}")

# Function to download images
def download_image(image_link, save_folder, retries=3, delay=3):
    # Ensure the save folder exists
    create_directory(save_folder)

    if not isinstance(image_link, str):
        logging.warning(f"Invalid image link: {image_link}")
        return None

    filename = Path(image_link).name
    image_save_path = os.path.join(save_folder, filename)

    if os.path.exists(image_save_path):
        logging.info(f"Image already exists at {image_save_path}, skipping download.")
        return image_save_path

    for attempt in range(retries):
        try:
            urllib.request.urlretrieve(image_link, image_save_path)
            logging.info(f"Downloaded image: {image_save_path}")
            return image_save_path
        except Exception as e:
            logging.warning(f"Failed to download {image_link} on attempt {attempt + 1}. Error: {e}")
            time.sleep(delay)

    logging.error(f"Failed to download image after {retries} attempts: {image_link}")
    return None

# Optimized concurrent image download
def download_images_concurrently(image_links, save_folder):
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(download_image, link, save_folder): link for link in image_links}
        for future in as_completed(futures):
            link = futures[future]
            try:
                future.result()
            except Exception as exc:
                logging.error(f"Image download failed for {link}: {exc}")

# Function to detect and extract text from images
def extract_text_from_image(image_path):
    results = reader.readtext(image_path)
    extracted_text = ' '.join([result[1] for result in results])  # Combine all detected text into a single string
    return extracted_text

# Regex patterns for extracting entity features
'''entity_patterns = {
    "item_weight": r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton))',
    "item_volume": r'(\d+(\.\d+)?\s?(centilitre|cubic foot|cubic inch|cup|decilitre|fluid ounce|gallon|imperial gallon|litre|microlitre|millilitre|pint|quart))',
    "height": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard))',
    "width": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard))',
    "dimension": r'(\d+(\.\d+)?\s?(mm|cm|m|in|ft|yd))',
    "voltage": r'(\d+(\.\d+)?\s?(volt|kilovolt|millivolt))',
    "wattage": r'(\d+(\.\d+)?\s?(watt|kilowatt))'
}'''
entity_patterns = {
    "item_weight": r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton|g|kg|mg|lb))',
    "item_volume": r'(\d+(\.\d+)?\s?(centilitre|cubic foot|cubic inch|cup|decilitre|fluid ounce|gallon|imperial gallon|litre|microlitre|millilitre|pint|quart|cl|ml|l|fl oz|gal))',
    "height": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "width": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "depth": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "dimension": r'(\d+(\.\d+)?\s?(mm|cm|m|in|ft|yd))',
    "voltage": r'(\d+(\.\d+)?\s?(volt|kilovolt|millivolt|V|kV|mV))',
    "wattage": r'(\d+(\.\d+)?\s?(watt|kilowatt|W|kW))',
    "maximum_weight_recommendation": r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton|g|kg|mg|lb))'
}

# Function to extract entities from text using NER
def extract_entities(text):
    entities = ner(text)
    entity_dict = {}
    for entity in entities:
        if entity['entity'] not in entity_dict:
            entity_dict[entity['entity']] = []
        entity_dict[entity['entity']].append(entity['word'])
    return entity_dict





Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% Complete

  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  model.load_state_dict(torch.load(model_path, map_location=device))
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:
# Combine text extraction and entity classification
def extract_features_from_image(image_path):
    text = extract_text_from_image(image_path)
    extracted_entities = extract_entities(text)

    # Extract numerical values with units using regex
    entity_features = {}
    for entity_name, pattern in entity_patterns.items():
        matches = re.findall(pattern, text, re.IGNORECASE)
        if matches:
            entity_features[entity_name] = [match[0] for match in matches]  # Extract the full matched pattern

    return text, entity_features

# Predict for Test Data
def generate_predictions(test_df, save_folder):
    # Limit processing to 1000 unique images
    image_links = test_df['image_link'].unique()[:limit]

    # Download the first 1000 images
    download_images_concurrently(image_links, save_folder)

    results = []
    for idx, row in test_df.head(limit).iterrows():
        image_url = row['image_link']
        entity_name = row['entity_name']
        index = row['index']

        image_filename = Path(image_url).name
        image_path = os.path.join(save_folder, image_filename)

        if not os.path.exists(image_path):
            logging.warning(f"Image not found: {image_path}")
            continue

        text, entity_features = extract_features_from_image(image_path)
        results.append({"index": index, "text": text, "extracted_entities": entity_features})

        logging.info(f"Processed index {index}: Extracted Text - {text}, Entities - {entity_features}")

    return pd.DataFrame(results)




In [None]:
# Run the prediction generation
logging.info("Starting prediction generation for test data.")
save_folder = r'/content/Someresults'
create_directory(save_folder)  # Ensure save folder is created
predictions_df = generate_predictions(test_df, save_folder)
predictions_df.to_csv('output_advanced_features1.csv', index=False)
logging.info("Predictions saved to output_advanced_features.csv")

logging.info("Completed processing and saved the predictions.")

In [None]:
import re
import pandas as pd
import logging
import os
import time
import urllib.request
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import pipeline
import easyocr
from PIL import Image

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # Initialize reader for English language

# Initialize NER Pipeline using Transformers
ner = pipeline("ner", model="dslim/bert-base-NER")

# Setup logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Correct paths for the CSV files
train_csv_file_path = r'/content/train.csv'
test_csv_file_path = r'/content/test.csv'

# Load the Data
logging.info("Loading training and testing data from CSV files.")
train_df = pd.read_csv(train_csv_file_path)
test_df = pd.read_csv(test_csv_file_path)

# Function to create directory if it does not exist
def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        logging.info(f"Created directory: {directory_path}")
    else:
        logging.info(f"Directory already exists: {directory_path}")

# Function to download images
def download_image(image_link, save_folder, retries=3, delay=3):
    # Ensure the save folder exists
    create_directory(save_folder)

    if not isinstance(image_link, str):
        logging.warning(f"Invalid image link: {image_link}")
        return None

    filename = Path(image_link).name
    image_save_path = os.path.join(save_folder, filename)

    if os.path.exists(image_save_path):
        logging.info(f"Image already exists at {image_save_path}, skipping download.")
        return image_save_path

    for attempt in range(retries):
        try:
            urllib.request.urlretrieve(image_link, image_save_path)
            logging.info(f"Downloaded image: {image_save_path}")
            return image_save_path
        except Exception as e:
            logging.warning(f"Failed to download {image_link} on attempt {attempt + 1}. Error: {e}")
            time.sleep(delay)

    logging.error(f"Failed to download image after {retries} attempts: {image_link}")
    return None

# Optimized concurrent image download
def download_images_concurrently(image_links, save_folder):
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(download_image, link, save_folder): link for link in image_links}
        for future in as_completed(futures):
            link = futures[future]
            try:
                future.result()
            except Exception as exc:
                logging.error(f"Image download failed for {link}: {exc}")

# Function to detect and extract text from images
def extract_text_from_image(image_path):
    results = reader.readtext(image_path)
    extracted_text = ' '.join([result[1] for result in results])  # Combine all detected text into a single string
    return extracted_text

# Regex patterns for extracting entity features
entity_patterns = {
    "item_weight": re.compile(r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton))', re.IGNORECASE),
    "item_volume": re.compile(r'(\d+(\.\d+)?\s?(centilitre|cubic foot|cubic inch|cup|decilitre|fluid ounce|gallon|imperial gallon|litre|microlitre|millilitre|pint|quart))', re.IGNORECASE),
    "height": re.compile(r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard))', re.IGNORECASE),
    "width": re.compile(r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard))', re.IGNORECASE),
    "dimension": re.compile(r'(\d+(\.\d+)?\s?(mm|cm|m|in|ft|yd))', re.IGNORECASE),
    "voltage": re.compile(r'(\d+(\.\d+)?\s?(volt|kilovolt|millivolt))', re.IGNORECASE),
    "wattage": re.compile(r'(\d+(\.\d+)?\s?(watt|kilowatt))', re.IGNORECASE)
}

# Entity unit map
entity_unit_map = {
    'width': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'depth': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'height': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'item_weight': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'maximum_weight_recommendation': {'gram', 'kilogram', 'microgram', 'milligram', 'ounce', 'pound', 'ton'},
    'voltage': {'kilovolt', 'millivolt', 'volt'},
    'wattage': {'kilowatt', 'watt'},
    'item_volume': {'centilitre', 'cubic foot', 'cubic inch', 'cup', 'decilitre', 'fluid ounce', 'gallon', 'imperial gallon', 'litre', 'microlitre', 'millilitre', 'pint', 'quart'}
}

# Add all units from entity_unit_map to regex patterns
for entity, units in entity_unit_map.items():
    pattern = r'(\d+(\.\d+)?\s?(' + '|'.join(units) + r'))'
    entity_patterns[entity] = re.compile(pattern, re.IGNORECASE)

# Function to extract entities from text using NER and regex
def extract_entities_advanced(text):
    # Use NLP model to extract entities
    doc = nlp(text)
    entity_features = defaultdict(list)

    # Extract entities based on NER
    for ent in doc.ents:
        if ent.label_ in {'QUANTITY', 'CARDINAL'}:
            for entity_name, units in entity_patterns.items():
                if any(unit in ent.text.lower() for unit in entity_patterns[entity_name].pattern.split('|')):
                    entity_features[entity_name].append(ent.text)

    # Also use regex to find more complex patterns
    for entity_name, pattern in entity_patterns.items():
        matches = pattern.findall(text)
        if matches:
            entity_features[entity_name].extend([match[0] for match in matches])

    # Remove duplicates and empty entries
    entity_features = {k: list(set(v)) for k, v in entity_features.items() if v}

    return entity_features

# Function to extract entities from text using NER
def extract_entities(text):
    entities = ner(text)
    entity_dict = {}
    for entity in entities:
        if entity['entity'] not in entity_dict:
            entity_dict[entity['entity']] = []
        entity_dict[entity['entity']].append(entity['word'])
    return entity_dict

# Updated function to extract text and entities
def extract_features_from_image(image_path):
    text = extract_text_from_image(image_path)
    cleaned_text = clean_text(text)
    entity_features = extract_entities_advanced(cleaned_text)
    return cleaned_text, entity_features

# Sample debug to see cleaned text and extracted entities
def debug_sample_extraction(image_path):
    cleaned_text, entity_features = extract_features_from_image(image_path)
    logging.info(f"Cleaned Text: {cleaned_text}")
    logging.info(f"Extracted Entities: {entity_features}")

# Test with a sample image to check the extraction process




  net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
  model.load_state_dict(torch.load(model_path, map_location=device))
Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:
'''sample_image_path = '/content/Someresults/110EibNyclL.jpg'  # Replace with actual path
debug_sample_extraction(sample_image_path)
'''

"sample_image_path = '/content/Someresults/110EibNyclL.jpg'  # Replace with actual path\ndebug_sample_extraction(sample_image_path)\n"

In [None]:
def debug_sample_extraction(image_path):
    cleaned_text, entity_features = extract_features_from_image(image_path)
    print(f"Cleaned Text: {cleaned_text}")
    print(f"Extracted Entities: {entity_features}")


In [None]:
!pip install tqdm



In [None]:
import os
import pandas as pd
import logging
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import urllib.request
import time

import easyocr  # Deep learning-based OCR
from transformers import pipeline  # NER from Transformers
import re  # Regular expressions for pattern matching
from tqdm import tqdm  # For progress bar
from IPython.display import display, clear_output

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # Initialize reader for English language

# Initialize NER Pipeline using Transformers
ner = pipeline("ner", model="dslim/bert-base-NER")

# Setup logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Correct paths for the CSV files
train_csv_file_path = r'/content/train.csv'
test_csv_file_path = r'/content/test.csv'

# Load the Data
logging.info("Loading training and testing data from CSV files.")
train_df = pd.read_csv(train_csv_file_path)
test_df = pd.read_csv(test_csv_file_path)

# Function to create directory if it does not exist
def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        logging.info(f"Created directory: {directory_path}")
    else:
        logging.info(f"Directory already exists: {directory_path}")

# Function to download images
def download_image(image_link, save_folder, retries=3, delay=3):
    # Ensure the save folder exists
    create_directory(save_folder)

    if not isinstance(image_link, str):
        logging.warning(f"Invalid image link: {image_link}")
        return None

    filename = Path(image_link).name
    image_save_path = os.path.join(save_folder, filename)

    if os.path.exists(image_save_path):
        logging.info(f"Image already exists at {image_save_path}, skipping download.")
        return image_save_path

    for attempt in range(retries):
        try:
            urllib.request.urlretrieve(image_link, image_save_path)
            logging.info(f"Downloaded image: {image_save_path}")
            return image_save_path
        except Exception as e:
            logging.warning(f"Failed to download {image_link} on attempt {attempt + 1}. Error: {e}")
            time.sleep(delay)

    logging.error(f"Failed to download image after {retries} attempts: {image_link}")
    return None

# Optimized concurrent image download
def download_images_concurrently(image_links, save_folder):
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(download_image, link, save_folder): link for link in image_links}
        for future in as_completed(futures):
            link = futures[future]
            try:
                future.result()
            except Exception as exc:
                logging.error(f"Image download failed for {link}: {exc}")

# Function to detect and extract text from images
def extract_text_from_image(image_path):
    results = reader.readtext(image_path)
    extracted_text = ' '.join([result[1] for result in results])  # Combine all detected text into a single string
    return extracted_text

# Regex patterns for extracting entity features
entity_patterns = {
    "item_weight": r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton|g|kg|mg|lb))',
    "item_volume": r'(\d+(\.\d+)?\s?(centilitre|cubic foot|cubic inch|cup|decilitre|fluid ounce|gallon|imperial gallon|litre|microlitre|millilitre|pint|quart|cl|ml|l|fl oz|gal))',
    "height": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "width": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "depth": r'(\d+(\.\d+)?\s?(centimetre|foot|inch|metre|millimetre|yard|cm|ft|in|mm|m|yd))',
    "dimension": r'(\d+(\.\d+)?\s?(mm|cm|m|in|ft|yd))',
    "voltage": r'(\d+(\.\d+)?\s?(volt|kilovolt|millivolt|V|kV|mV))',
    "wattage": r'(\d+(\.\d+)?\s?(watt|kilowatt|W|kW))',
    "maximum_weight_recommendation": r'(\d+(\.\d+)?\s?(gram|kilogram|microgram|milligram|ounce|pound|ton|g|kg|mg|lb))'
}

# Function to extract entities from text using NER
def extract_entities(text):
    entities = ner(text)
    entity_dict = {}
    for entity in entities:
        if entity['entity'] not in entity_dict:
            entity_dict[entity['entity']] = []
        entity_dict[entity['entity']].append(entity['word'])
    return entity_dict

# Combine text extraction and entity classification
def extract_features_from_image(image_path):
    text = extract_text_from_image(image_path)
    extracted_entities = extract_entities(text)

    # Extract numerical values with units using regex
    entity_features = {}
    for entity_name, pattern in entity_patterns.items():
        matches = re.findall(pattern, text, re.IGNORECASE)
        if matches:
            entity_features[entity_name] = [match[0] for match in matches]  # Extract the full matched pattern

    return text, entity_features

# Predict for Test Data
def generate_predictions(test_df, save_folder):
    # Limit processing to 1000 unique images
    image_links = test_df['image_link'].unique()

    # Download the images with progress
    logging.info("Downloading images...")
    download_images_concurrently(image_links, save_folder)

    results = []
    progress_bar = tqdm(total=len(test_df), desc="Processing Images")

    for idx, row in test_df.iterrows():
        image_url = row['image_link']
        entity_name = row['entity_name']
        index = row['index']

        image_filename = Path(image_url).name
        image_path = os.path.join(save_folder, image_filename)

        if not os.path.exists(image_path):
            logging.warning(f"Image not found: {image_path}")
            continue

        text, entity_features = extract_features_from_image(image_path)
        results.append({"index": index, "text": text, "extracted_entities": entity_features})

        # Print current progress in Colab
        clear_output(wait=True)
        display(f"Processed index {index}: Extracted Text - {text}, Entities - {entity_features}")

        # Update progress bar
        progress_bar.update(1)

    progress_bar.close()
    return pd.DataFrame(results)

# Run the prediction generation
logging.info("Starting prediction generation for test data.")
save_folder = r'/content/Someresults'
create_directory(save_folder)  # Ensure save folder is created
predictions_df = generate_predictions(test_df, save_folder)
predictions_df.to_csv('output_advanced_features1.csv', index=False)
logging.info("Predictions saved to output_advanced_features1.csv")

logging.info("Completed processing and saved the predictions.")


"Processed index 16684: Extracted Text - 30cm/11.81in 40cm/15in 24pcs of twist pins, Entities - {'height': ['30cm', '11.81in', '40cm', '15in'], 'width': ['30cm', '11.81in', '40cm', '15in'], 'depth': ['30cm', '11.81in', '40cm', '15in'], 'dimension': ['30cm', '11.81in', '40cm', '15in']}"

Processing Images:  13%|█▎        | 16668/131187 [1:59:46<15:01:04,  2.12it/s]