In [1]:
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import cv2
import pytesseract


In [5]:
from torchvision.datasets.vision import VisionDataset
import requests
from PIL import Image
import torch
import pandas as pd
from io import BytesIO

class ProductDataset(VisionDataset):
    def __init__(self, csv_file, transforms=None):
        """
        Args:
            csv_file (str): Path to the CSV file containing image links, entity names, values, etc.
            transforms (callable, optional): Optional transform to be applied on a sample.
        """
        super(ProductDataset, self).__init__(root=None, transforms=transforms)
        self.data = pd.read_csv(csv_file)
        self.transforms = transforms

    def download_image(self, url):
        try:
            response = requests.get(url)
            if response.status_code == 200:
                img = Image.open(BytesIO(response.content)).convert("RGB")
                return img
            else:
                print(f"Failed to download image: {url}")
                return None
        except Exception as e:
            print(f"Error downloading image: {e}")
            return None

    def __getitem__(self, idx):
        # Access the row of the dataset
        row = self.data.iloc[idx]
        image_url = row['image_link']
        entity_name = row['entity_name']
        entity_value = row['entity_value']
        group_id = row['group_id']

        # Download the image
        img = self.download_image(image_url)
        if img is None:
            return None, None

        # Create a dummy target (adjust as needed for your use case)
        # Assuming you want to detect entities as boxes (which could be handled by object detection)
        target = {
            "entity_name": entity_name,
            "entity_value": entity_value,
            "group_id": group_id
        }

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.data)


In [6]:
# Load a pre-trained Faster R-CNN model and modify it for our use case
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier with a new one (assuming we have N classes)
num_classes = 2  # background and entity (like weight, volume, etc.)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\abhis/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100.0%


NameError: name 'torchvision' is not defined

In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dataset/train.csv')

# Display the first few rows
print(df.head())


                                          image_link  group_id  entity_name  \
0  https://m.media-amazon.com/images/I/61I9XdN6OF...    748919  item_weight   
1  https://m.media-amazon.com/images/I/71gSRbyXmo...    916768  item_volume   
2  https://m.media-amazon.com/images/I/61BZ4zrjZX...    459516  item_weight   
3  https://m.media-amazon.com/images/I/612mrlqiI4...    459516  item_weight   
4  https://m.media-amazon.com/images/I/617Tl40LOX...    731432  item_weight   

     entity_value  
0      500.0 gram  
1         1.0 cup  
2      0.709 gram  
3      0.709 gram  
4  1400 milligram  


In [None]:
import requests
from PIL import Image
from io import BytesIO

def download_image(image_url):
    try:
        response = requests.get(image_url)
        if response.status_code == 200:
            # Convert the downloaded content into an image
            img = Image.open(BytesIO(response.content))
            return img
        else:
            print(f"Failed to retrieve image from {image_url}")
            return None
    except Exception as e:
        print(f"Error downloading image: {e}")
        return None

# Example usage: download an image from the first row
image_url = df.iloc[0]['image_link']
img = download_image(image_url)

# Display the image (optional)
if img:
    img.show()


In [None]:
images = []
for index, row in df.iterrows():
    image_url = row['image_link']
    img = download_image(image_url)
    if img:
        images.append(img)
    else:
        print(f"Could not download image for row {index}")

# Now `images` contains all the downloaded images


In [None]:
def process_images_and_extract_entities(df):
    for index, row in df.iterrows():
        image_url = row['image_link']
        entity_name = row['entity_name']
        entity_value = row['entity_value']

        img = download_image(image_url)
        if img:
            # Apply OCR and entity extraction logic
            extracted_entities = extract_entity_values_from_image(img)
            print(f"Extracted entities for image {index}: {extracted_entities}")
        else:
            print(f"Image {index} could not be processed.")
