In [115]:
import os
import pandas as pd
import requests
from io import BytesIO
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from urllib.parse import urlparse
from sklearn.model_selection import train_test_split
from skimage import io
from difflib import get_close_matches
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
from torch.optim import Adam

In [116]:
data = pd.read_csv('/content/train_amazon_ml.csv')

In [117]:
data

Unnamed: 0,image_link,group_id,entity_name,entity_value
0,https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,https://m.media-amazon.com/images/I/61BZ4zrjZX...,459516,item_weight,0.709 gram
3,https://m.media-amazon.com/images/I/612mrlqiI4...,459516,item_weight,0.709 gram
4,https://m.media-amazon.com/images/I/617Tl40LOX...,731432,item_weight,1400 milligram
...,...,...,...,...
263854,https://m.media-amazon.com/images/I/612J1R1xHl...,558806,height,5.0 centimetre
263855,https://m.media-amazon.com/images/I/61Blzh2+28...,470067,height,8.5 inch
263856,https://m.media-amazon.com/images/I/51MsegDL9V...,204245,height,43.2 centimetre
263857,https://m.media-amazon.com/images/I/510KhVw4VS...,752266,height,9.1 centimetre


In [118]:
def download_images(image_links, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, link in enumerate(image_links):
        try:
            response = requests.get(link)
            response.raise_for_status()
            parsed_url = urlparse(link)
            image_name = os.path.basename(parsed_url.path)
            image_path = os.path.join(output_dir, image_name)

            image = Image.open(BytesIO(response.content))
            image.save(image_path)
            print(f"Downloaded and saved image: {image_path}")
        except Exception as e:
            print(f"Error downloading or saving image from {link}: {e}")

In [6]:
from collections import OrderedDict
unique_values = OrderedDict()
b = []
for index, value in enumerate(data['entity_name']):
    if value not in unique_values:
        unique_values[value] = index
        b.append(index)
for value, index in unique_values.items():
    print(f"Value: {value}, Index: {index}")

Value: item_weight, Index: 0
Value: item_volume, Index: 1
Value: voltage, Index: 25
Value: wattage, Index: 36
Value: maximum_weight_recommendation, Index: 197
Value: height, Index: 536
Value: depth, Index: 670
Value: width, Index: 3205


In [8]:
b

[0, 1, 25, 36, 197, 536, 670, 3205]

In [119]:
class ImageCaptionDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.basename(self.data.iloc[idx, 0])
        img_path = os.path.join(self.root_dir, img_name)

        if not os.path.isfile(img_path):
            print(f"File {img_path} not found.")
            return None, None
        image = Image.open(img_path).convert('RGB')
        label = self.data.iloc[idx, 1]  # Assuming labels are in the 2nd column

        if self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.tensor(float(label))  # Cast labels as floats if they're numeric

        return image, label

In [120]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [121]:
def fine_tune_resnet(model):
    for param in model.cnn_model.parameters():
        param.requires_grad = False
    for param in model.cnn_model.layer4.parameters():
        param.requires_grad = True

In [122]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

class ImageCaptioningModel(nn.Module):
    def __init__(self, embed_size=256, hidden_size=512, vocab_size=10000):
        super(ImageCaptioningModel, self).__init__()
        self.cnn_model = models.resnet50(weights='DEFAULT')
        fine_tune_resnet(self)
        num_ftrs = self.cnn_model.fc.in_features
        self.cnn_model.fc = Identity()
        self.fc = nn.Linear(num_ftrs, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size)
        self.fc_out = nn.Linear(hidden_size, vocab_size)
        self.attention = Attention(embed_size, hidden_size)

    def forward(self, images):
        features = self.cnn_model(images)
        features = features.view(features.size(0), -1)
        embeddings = self.fc(features)
        lstm_out, _ = self.lstm(embeddings.unsqueeze(1))
        outputs = self.fc_out(lstm_out.squeeze(1))
        return outputs

In [123]:
def collate_fn(batch):
    images = [item[0] for item in batch if item[0] is not None]
    labels = [item[1] for item in batch if item[1] is not None]
    if images:
        images = torch.stack(images, dim=0)
    else:
        images = torch.empty(0)

    if labels:
        labels = torch.tensor(labels)
    else:
        labels = torch.empty(0)

    return images, labels

In [124]:
class Attention(nn.Module):
    def __init__(self, embed_size, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(embed_size + hidden_size, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))

    def forward(self, hidden, encoder_outputs):
        timestep = encoder_outputs.size(1)
        h = hidden.repeat(timestep, 1, 1).transpose(0, 1)
        attn_energies = self.score(h, encoder_outputs)
        return F.softmax(attn_energies, dim=1)

    def score(self, hidden, encoder_outputs):
        energy = torch.tanh(self.attn(torch.cat([hidden, encoder_outputs], 2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        energy = torch.bmm(v, energy)
        return energy.squeeze(1)

In [125]:
def custom_loss(outputs, labels, numeric_labels):
    if not isinstance(labels, torch.LongTensor):
        labels = labels.long()
    num_classes = outputs.shape[1]
    if torch.max(labels) >= num_classes:
        print(f"Problematic labels: {labels[labels >= num_classes]}")
        raise ValueError(f"Labels contain values outside the valid range (0-{num_classes-1}).")
    class_loss = nn.CrossEntropyLoss()(outputs, labels)
    return class_loss

In [126]:
data1 = data.copy()

In [127]:
d_image1 = []
group_id2 = []
entity_name2 = []
entity_value2 = []

In [128]:
for i in range(0, len(b)):
  d_image1.append(data1['image_link'][b[i]])
  group_id2.append(data1['group_id'][b[i]])
  entity_name2.append(data1['entity_name'][b[i]])
  entity_value2.append(data1['entity_value'][b[i]])

In [24]:
df = pd.DataFrame({'image_link': d_image1, 'group_id': group_id2, 'entity_name': entity_name2, 'entity_value': entity_value2})

In [25]:
df

Unnamed: 0,image_link,group_id,entity_name,entity_value
0,https://m.media-amazon.com/images/I/61I9XdN6OF...,748919,item_weight,500.0 gram
1,https://m.media-amazon.com/images/I/71gSRbyXmo...,916768,item_volume,1.0 cup
2,https://m.media-amazon.com/images/I/71nywfWZUw...,179080,voltage,48.0 volt
3,https://m.media-amazon.com/images/I/61o2ntPNNg...,179080,wattage,800.0 watt
4,https://m.media-amazon.com/images/I/71d+dz7ogk...,150913,maximum_weight_recommendation,15 kilogram
5,https://m.media-amazon.com/images/I/610bLFQIS3...,442321,height,95.0 centimetre
6,https://m.media-amazon.com/images/I/51k7GMS8dg...,630869,depth,21.0 centimetre
7,https://m.media-amazon.com/images/I/51GzV0nG31...,675317,width,22.0 millimetre


In [111]:
df.to_csv('unique1.csv', index=False)

In [103]:
download_images(df['image_link'], 'train_images2')

Downloaded and saved image: train_images2/61I9XdN6OFL.jpg
Downloaded and saved image: train_images2/71gSRbyXmoL.jpg
Downloaded and saved image: train_images2/71nywfWZUwL.jpg
Downloaded and saved image: train_images2/61o2ntPNNgL.jpg
Downloaded and saved image: train_images2/71d+dz7ogkL.jpg
Downloaded and saved image: train_images2/610bLFQIS3L.jpg
Downloaded and saved image: train_images2/51k7GMS8dgL.jpg
Downloaded and saved image: train_images2/51GzV0nG31L.jpg


In [86]:
from torch.optim import Adam
import torch.nn.functional as F

In [39]:
model = ImageCaptioningModel()
fine_tune_resnet(model)
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5, verbose=True)



In [112]:
train_dataset = ImageCaptionDataset('/content/unique1.csv', 'train_images2', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=collate_fn)



In [113]:
def train_optimized(model, train_loader, optimizer, scheduler, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        for images, labels in train_loader:
            if images is None or labels is None:
                continue

            optimizer.zero_grad()

            outputs = model(images)
            numerical_labels = labels.float()
            loss = custom_loss(outputs, labels, numerical_labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        scheduler.step(epoch_loss)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader)}")

In [129]:
train_optimized(model, train_loader, optimizer, scheduler)



Problematic labels: tensor([442321, 630869, 916768, 179080, 150913, 675317, 748919, 179080])


ValueError: Labels contain values outside the valid range (0-9999).