In [5]:
import json

In [6]:
with open('train_no_dup_new_100.json', 'r') as f:
        outfits = json.load(f)


print(len(outfits))

16983


In [7]:
import torch
from torch_geometric.data import Data, Dataset
import os
import os.path as osp
from tqdm import tqdm

import torchvision.transforms as transforms
from torchvision.models import inception_v3
from PIL import Image

model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
model.fc = torch.nn.Identity()  # Replace the classification layer with an identity layer
model.eval() # Set the model to evaluation mode


class outfitsDataset(Dataset):
    def __init__(self, root, outfits,transform=None, pre_transform=None, pre_filter=None):
        self.outfits = outfits
        super().__init__(root, transform, pre_transform, pre_filter)

    @property
    def raw_file_names(self):
        return os.listdir('..\\Outfit-Recommender-GNN\\outfitsData')

    @property
    def processed_file_names(self):
        return [f'{file}' for file in os.listdir(self.processed_dir)]

    def preprocess_image(self, img_path):
        
        transform = transforms.Compose([
        transforms.Resize(299),
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        img = Image.open(img_path)
        if img.mode != 'RGB':
            #print('false')
            img = img.convert("RGB")
        img_tensor = transform(img).unsqueeze(0)  # Add batch dimension
        return img_tensor
    
    def extract_features(self, img_path, model):
        img_tensor = self.preprocess_image(img_path)
        with torch.no_grad():  # Disable gradient calculation for inference
            features = model(img_tensor)
        return features.squeeze()  # Remove the batch dimension
        
    def process_outfit(self, set_id, indexes):
        images = []
        for dirname in os.listdir('..\\Outfit-Recommender-GNN\\images'):
            if dirname == set_id:
                set_path = os.path.join('..\\Outfit-Recommender-GNN\\images', dirname)
                i = 0
                for image in os.listdir(set_path):
                    try:
                        if i in indexes:
                            img_path = os.path.join(set_path, image)
                            image_features = self.extract_features(img_path, model)
                            images.append(image_features)
                            #print(image_features)
                            #print('complete')
                    except Exception as e:
                        print(f"Error loading '{set_path}'\\{image}: {e}")
                    i += 1
                return images
            
        return "could not find"
        
    def process(self):
        idx = 0
        for outfit in tqdm(self.outfits):
            a_outift = self.process_outfit(outfit['set_id'], outfit['items_index'])
            edge_index = []
            for i in range(len(outfit)):
                for j in range(len(outfit)):
                    if i == j:
                        continue
                    edge_index.append([i, j])
            edge_index = torch.tensor(edge_index)
            x = a_outift
            data = Data(x=x, edge_index=edge_index.t().contiguous())
            torch.save(data, osp.join(self.processed_dir, f'data_{idx}.pt'))
            idx+=1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, f'data_{idx}.pt'))
        return data

Using cache found in C:\Users\Admin/.cache\torch\hub\pytorch_vision_v0.10.0


In [8]:
base_path = os.path.join('..', 'Outfit-Recommender-GNN', 'outfitsData')

outfits = outfits[:1000]
outfits_graphs = outfitsDataset(base_path, outfits=outfits)
# outfits_graphs.process()
print(outfits_graphs[1].x[0].size())

In [28]:
from torch_geometric.nn import GraphSAGE, global_mean_pool, SAGEConv
from torch_geometric.loader import DataLoader
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels, aggr='mean')  # First GraphSAGE layer
        self.conv2 = SAGEConv(hidden_channels, out_channels, aggr='mean')  # Second GraphSAGE layer

    def forward(self, x, edge_index, batch):
        # First GraphSAGE layer
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        # Second GraphSAGE layer
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        # Global mean pooling
        # x = global_mean_pool(x, batch)  # Optional: Aggregate node embeddings to graph-level embedding
        return x


In [29]:
def graphloss(node_embeddings, edge_index, num_negative_samples=5):
    """
    Implement the graph-based loss function as described.
    
    Parameters:
        node_embeddings: Tensor of shape [num_nodes, embedding_dim] containing the embeddings of the nodes.
        edge_index: Tensor of shape [2, num_edges] containing the indices of source and target nodes of each edge.
        num_negative_samples: The number of negative samples Q for each positive sample.
    
    Returns:
        loss: The computed loss value as a PyTorch scalar.
    """
    # Positive samples loss
    src_node_embeddings = node_embeddings[edge_index[0]]  # Source node embeddings
    target_node_embeddings = node_embeddings[edge_index[1]]  # Target node embeddings
    
    positive_score = torch.sum(src_node_embeddings * target_node_embeddings, dim=1)  # Dot product
    positive_loss = -torch.log(torch.sigmoid(positive_score)).mean()
    
    # Negative samples loss
    num_nodes, embedding_dim = node_embeddings.size()
    negative_loss = 0
    for _ in range(num_negative_samples):
        # Randomly sample negative targets for each source node
        negative_targets = torch.randint(0, num_nodes, (edge_index.size(1),), device=node_embeddings.device)
        negative_target_embeddings = node_embeddings[negative_targets]
        
        negative_score = torch.sum(src_node_embeddings * negative_target_embeddings, dim=1)  # Dot product
        negative_loss += -torch.log(torch.sigmoid(-negative_score)).mean()
    
    negative_loss /= num_negative_samples  # Average over all negative samples
    loss = positive_loss + negative_loss
    return loss

In [31]:
model = GraphSAGE(in_channels=2048, hidden_channels=512, out_channels=256)
print(model)
loader = DataLoader(outfits_graphs, batch_size=32, shuffle=True)

GraphSAGE(
  (conv1): SAGEConv(2048, 512, aggr=mean)
  (conv2): SAGEConv(512, 256, aggr=mean)
)


In [36]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()

for epoch in range(10):  # Number of epochs
    #last_loss = 0.
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        # Calculate your loss here. For example, using a contrastive loss as described in the document.
        loss = graphloss(out, data.edge_index)
        loss.backward()
        optimizer.step()
            
        print(loss)


IndexError: list index out of range