In [50]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import requests
import re
from torchvision import models
import torch.nn as nn
import torch.optim as optim

In [51]:
# Define entity type mapping
entity_type_mapping = {
    'width': 0,
    'depth': 1,
    'height': 2,
    'item_weight': 3,
    'maximum_weight_recommendation': 4,
    'voltage': 5,
    'wattage': 6,
    'item_volume': 7
}

In [52]:
# Dataset class for loading image and text data
class ImageTextDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_url = self.data_frame.iloc[idx, 0]
        entity_type = self.data_frame.iloc[idx, 2]
        entity_value = self.data_frame.iloc[idx, 3]

        # Load image
        image = Image.open(requests.get(img_url, stream=True).raw)

        if self.transform:
            image = self.transform(image)

        # Convert entity_type to numerical ID
        entity_type_id = torch.tensor(entity_type_mapping.get(entity_type, -1))

        # Extract numeric value from entity_value
        match = re.search(r'\d+(\.\d+)?', entity_value)  # Match float or integer numbers
        if match:
            value = float(match.group())  # Extract the numeric part as a float
        else:
            raise ValueError(f"No numeric value found in entity_value: {entity_value}")

        # Return image, entity_type_id, and entity_value
        return image, entity_type_id, torch.tensor(value)

In [53]:
# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [54]:
# Load dataset
train_dataset = ImageTextDataset(csv_file='training_data.csv', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [55]:
# Define the model
class EntityAwareModel(nn.Module):
    def __init__(self, num_entity_types):
        super(EntityAwareModel, self).__init__()
        self.image_model = models.resnet18(pretrained=True)
        self.image_model.fc = nn.Linear(512, 256)  # Adjust according to the output size of ResNet
        self.entity_embedding = nn.Embedding(num_entity_types, 10)  # Example: embedding size 10
        self.fc = nn.Linear(256 + 10, 1)  # Adjust input size based on image_model output and entity embedding

    def forward(self, image, entity_type):
        image_features = self.image_model(image)
        entity_embedding = self.entity_embedding(entity_type)
        combined_features = torch.cat((image_features, entity_embedding), dim=1)
        output = self.fc(combined_features)
        return output

In [56]:
# Initialize model, criterion, and optimizer
num_entity_types = len(entity_type_mapping)
model = EntityAwareModel(num_entity_types)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [57]:
# Training loop
for epoch in range(50):  # Example: Train for 10 epochs
    model.train()  # Set model to training mode
    for images, entity_types, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images, entity_types)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 275662.1875
Epoch 2, Loss: 238081.140625
Epoch 3, Loss: 584998.5625
Epoch 4, Loss: 14247.953125
Epoch 5, Loss: 22329.255859375
Epoch 6, Loss: 441958.40625
Epoch 7, Loss: 21525.373046875
Epoch 8, Loss: 64215.0
Epoch 9, Loss: 128520.0390625
Epoch 10, Loss: 120121.5703125
Epoch 11, Loss: 113012.390625
Epoch 12, Loss: 937122.3125
Epoch 13, Loss: 183559.765625
Epoch 14, Loss: 18277.12890625
Epoch 15, Loss: 2335899.5
Epoch 16, Loss: 225246.03125
Epoch 17, Loss: 105079.3515625
Epoch 18, Loss: 199756.59375
Epoch 19, Loss: 5682.1787109375
Epoch 20, Loss: 11033.0537109375


In [39]:
# Save the trained model
torch.save(model.state_dict(), 'trained_model.pth')

In [46]:
# Load the trained model
model = EntityAwareModel(num_entity_types)
model.load_state_dict(torch.load('trained_model.pth'))
model.eval()

  model.load_state_dict(torch.load('trained_model.pth'))


EntityAwareModel(
  (image_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=Tr

In [47]:
image = Image.open('images/73.jpg')
image = transform(image).unsqueeze(0) 

In [48]:
# Dummy entity type for inference
entity_type = 'height'  # Example entity type
entity_type_id = torch.tensor([entity_type_mapping.get(entity_type, -1)])

In [49]:
with torch.no_grad():
    prediction = model(image, entity_type_id)
print(f'Predicted Value: {prediction.item()}')

Predicted Value: -0.327099472284317
