In [68]:
import numpy as np
import pandas as pd
import tensorflow as tf
from skimage import io

In [69]:
from keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
import requests
from io import BytesIO
from PIL import UnidentifiedImageError

In [70]:
data = pd.read_csv('/content/train_amazon_ml.csv')

In [71]:
data

Unnamed: 0,image_link,group_id,entity_name,entity_value
0,https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,https://m.media-amazon.com/images/I/61BZ4zrjZX...,459516,item_weight,0.709 gram
3,https://m.media-amazon.com/images/I/612mrlqiI4...,459516,item_weight,0.709 gram
4,https://m.media-amazon.com/images/I/617Tl40LOX...,731432,item_weight,1400 milligram
...,...,...,...,...
263854,https://m.media-amazon.com/images/I/612J1R1xHl...,558806,height,5.0 centimetre
263855,https://m.media-amazon.com/images/I/61Blzh2+28...,470067,height,8.5 inch
263856,https://m.media-amazon.com/images/I/51MsegDL9V...,204245,height,43.2 centimetre
263857,https://m.media-amazon.com/images/I/510KhVw4VS...,752266,height,9.1 centimetre


In [72]:
import os
import pandas as pd
from tqdm import tqdm
from PIL import Image

In [73]:
from urllib.parse import urlparse
import requests

In [74]:
def download_images(image_links, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, link in enumerate(image_links):
        try:
            response = requests.get(link)
            response.raise_for_status()
            parsed_url = urlparse(link)
            image_name = os.path.basename(parsed_url.path)
            image_path = os.path.join(output_dir, image_name)

            image = Image.open(BytesIO(response.content))
            image.save(image_path)
            print(f"Downloaded and saved image: {image_path}")
        except Exception as e:
            print(f"Error downloading or saving image from {link}: {e}")

In [75]:
from collections import OrderedDict

unique_values = OrderedDict()
b = []
for index, value in enumerate(data['entity_name']):
    if value not in unique_values:
        unique_values[value] = index
        b.append(index)
for value, index in unique_values.items():
    print(f"Value: {value}, Index: {index}")

Value: item_weight, Index: 0
Value: item_volume, Index: 1
Value: voltage, Index: 25
Value: wattage, Index: 36
Value: maximum_weight_recommendation, Index: 197
Value: height, Index: 536
Value: depth, Index: 670
Value: width, Index: 3205


In [76]:
b

[0, 1, 25, 36, 197, 536, 670, 3205]

In [77]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [78]:
class ImageCaptionDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
      img_name = os.path.basename(self.data.iloc[idx, 0])
      img_path = os.path.join(self.root_dir, img_name)

      try:
          image = io.imread(img_path)
      except FileNotFoundError:
          print(f"File {img_path} not found.")
          return None, None

      label = self.data.iloc[idx, 1]

      if self.transform:
          image = Image.fromarray(image)
          image = self.transform(image)

      label = torch.tensor(label)

      return image, label

In [79]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [80]:
class ImageCaptioningModel(nn.Module):
    def __init__(self, embed_size=256, hidden_size=512, vocab_size=10000):
        super(ImageCaptioningModel, self).__init__()
        self.cnn_model = models.resnet50(pretrained=True)
        for param in self.cnn_model.parameters():
            param.requires_grad = False
        num_ftrs = self.cnn_model.fc.in_features
        self.cnn_model.fc = Identity()
        self.fc = nn.Linear(num_ftrs, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size)
        self.fc_out = nn.Linear(hidden_size, vocab_size)

    def forward(self, images):
        features = self.cnn_model(images)
        features = features.view(features.size(0), -1)
        embeddings = self.fc(features)
        lstm_out, _ = self.lstm(embeddings.unsqueeze(1))
        outputs = self.fc_out(lstm_out.squeeze(1))
        return outputs

class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

In [81]:
from torch.optim import Adam
import torch.nn.functional as F

In [82]:
data1 = data.copy()

In [83]:
data1

Unnamed: 0,image_link,group_id,entity_name,entity_value
0,https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,https://m.media-amazon.com/images/I/61BZ4zrjZX...,459516,item_weight,0.709 gram
3,https://m.media-amazon.com/images/I/612mrlqiI4...,459516,item_weight,0.709 gram
4,https://m.media-amazon.com/images/I/617Tl40LOX...,731432,item_weight,1400 milligram
...,...,...,...,...
263854,https://m.media-amazon.com/images/I/612J1R1xHl...,558806,height,5.0 centimetre
263855,https://m.media-amazon.com/images/I/61Blzh2+28...,470067,height,8.5 inch
263856,https://m.media-amazon.com/images/I/51MsegDL9V...,204245,height,43.2 centimetre
263857,https://m.media-amazon.com/images/I/510KhVw4VS...,752266,height,9.1 centimetre


In [None]:
d_image = []
group_id1 = []
entity_name1 = []
entity_value1 = []

In [None]:
for i in range(0, len(b)):
  d_image.append(data1['image_link'][b[i]])
  group_id1.append(data1['group_id'][b[i]])
  entity_name1.append(data1['entity_name'][b[i]])
  entity_value1.append(data1['entity_value'][b[i]])

In [None]:
df = pd.DataFrame({'image_link': d_image, 'group_id': group_id1, 'entity_name': entity_name1, 'entity_value': entity_value1})

In [None]:
df

Unnamed: 0,image_link,group_id,entity_name,entity_value
0,https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,https://m.media-amazon.com/images/I/71nywfWZUw...,179080,voltage,48.0 volt
3,https://m.media-amazon.com/images/I/61o2ntPNNg...,179080,wattage,800.0 watt
4,https://m.media-amazon.com/images/I/71d+dz7ogk...,150913,maximum_weight_recommendation,15 kilogram
5,https://m.media-amazon.com/images/I/610bLFQIS3...,442321,height,95.0 centimetre
6,https://m.media-amazon.com/images/I/51k7GMS8dg...,630869,depth,21.0 centimetre
7,https://m.media-amazon.com/images/I/51GzV0nG31...,675317,width,22.0 millimetre


In [None]:
df.to_csv('unique2.csv', index=False)

In [None]:
download_images(df['image_link'], 'train_images5')

Downloaded and saved image: train_images5/61I9XdN6OFL.jpg
Downloaded and saved image: train_images5/71gSRbyXmoL.jpg
Downloaded and saved image: train_images5/71nywfWZUwL.jpg
Downloaded and saved image: train_images5/61o2ntPNNgL.jpg
Downloaded and saved image: train_images5/71d+dz7ogkL.jpg
Downloaded and saved image: train_images5/610bLFQIS3L.jpg
Downloaded and saved image: train_images5/51k7GMS8dgL.jpg
Downloaded and saved image: train_images5/51GzV0nG31L.jpg


In [None]:
train_dataset = ImageCaptionDataset('/content/unique2.csv', 'train_images5', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
model = ImageCaptioningModel()
optimizer = Adam(model.parameters(), lr=1e-4)



In [84]:
def train(model, train_loader, optimizer, num_epochs=10):
    model.train()
    criterion = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            numerical_labels = []
            for label in labels:
                try:
                    num_label = float(label)
                    if num_label >= 0 and num_label < outputs.shape[1]:
                        numerical_labels.append(num_label)
                    else:
                        numerical_labels.append(0)
                except ValueError:
                    numerical_labels.append(0)

            labels = torch.tensor(numerical_labels, dtype=torch.long)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

In [85]:
train(model, train_loader, optimizer)

Epoch [1/10], Loss: 6.69210147857666
Epoch [2/10], Loss: 6.327707767486572
Epoch [3/10], Loss: 5.934715747833252
Epoch [4/10], Loss: 5.527118682861328
Epoch [5/10], Loss: 5.079813480377197
Epoch [6/10], Loss: 4.630392074584961
Epoch [7/10], Loss: 4.214686393737793
Epoch [8/10], Loss: 3.826475143432617
Epoch [9/10], Loss: 3.366891622543335
Epoch [10/10], Loss: 3.0933613777160645


In [86]:
from difflib import get_close_matches

In [87]:
def find_closest_value(predicted_value, possible_values, entity_name):
    try:
        predicted_value = float(predicted_value)
        numeric_values = [float(val.split()[0]) for val in possible_values if val.split()[0].replace('.', '', 1).isdigit()]
        closest_value = min(numeric_values, key=lambda x: abs(x - predicted_value))
        closest_value = next(val for val in possible_values if str(closest_value) in val)
    except ValueError:
        matches = get_close_matches(predicted_value, possible_values, n=1, cutoff=0.1)
        closest_value = matches[0] if matches else "Unknown"
    return closest_value

In [91]:
def decode_outputs(outputs, possible_values, entity_names):
    decoded_texts = []
    for idx, output in enumerate(outputs):
        _, predicted = torch.max(output, 0)
        predicted = predicted.item()
        entity_name = entity_names[idx]
        closest_value = find_closest_value(predicted, possible_values, entity_name)
        decoded_texts.append(closest_value)
    return decoded_texts

In [92]:
def predict_and_show(model, data_loader, df):
    model.eval()
    idx_offset = 0
    possible_values = df['entity_value'].astype(str).unique().tolist()
    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            entity_names = df['entity_name'][idx_offset:idx_offset + len(images)].tolist()
            decoded_texts = decode_outputs(outputs, possible_values, entity_names)
            for i in range(len(decoded_texts)):
                entity_name = df.iloc[idx_offset + i]['entity_name']
                entity_value = df.iloc[idx_offset + i]['entity_value']
                print(f"Entity Name: {entity_name}, Actual Value: {entity_value}, Extracted Value: {decoded_texts[i]}")
            idx_offset += len(decoded_texts)

In [93]:
predict_and_show(model, train_loader, df)

Entity Name: item_weight, Actual Value: 500.0 gram, Extracted Value: 1.0 cup
Entity Name: item_volume, Actual Value: 1.0 cup, Extracted Value: 1.0 cup
Entity Name: voltage, Actual Value: 48.0 volt, Extracted Value: 1.0 cup
Entity Name: wattage, Actual Value: 800.0 watt, Extracted Value: 1.0 cup
Entity Name: maximum_weight_recommendation, Actual Value: 15 kilogram, Extracted Value: 1.0 cup
Entity Name: height, Actual Value: 95.0 centimetre, Extracted Value: 1.0 cup
Entity Name: depth, Actual Value: 21.0 centimetre, Extracted Value: 1.0 cup
Entity Name: width, Actual Value: 22.0 millimetre, Extracted Value: 1.0 cup
