In [13]:
import pandas as pd
import numpy as np

In [14]:
df = pd.read_csv('dataset.csv')
# Add ".jpg" extension to all filenames
df['Image name'] = df['Image name'].astype(str) + '.png'
df.head()

Unnamed: 0,Image name,Shirt,Vintage,Sweatshirt,T-shirt,Long Sleeve,Polo,Floral,Camo,Striped,...,Skirt,Mini skirt,Midi skirt,Maxi skirt,Dress,Mini dress,Midi dress,Maxi dress,Turtleneck,Leather
0,91da8ace712f42f7bac3bd4c9803ec020.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,b69efd7be15c4384bcd4642205d01cc11.png,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4c48eea103eb437e909b9fe5f25647222.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5aec4aa440564f339f95ebf5d4ab913c3.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,6d79eae68ac24997941fa950ff68ac844.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
# Sum the occurrences of each label (excluding the 'Image name' column)
def label_counts(df):
    return df.drop(columns=['Image name']).sum()

from sklearn.model_selection import train_test_split

# Step 1: Split into train and temporary set
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)  # 70% train, 30% temp

# Step 2: Split the temp set into validation and test
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)  # 15% val, 15% test

print(label_counts(df))
# print(label_counts(train_df))
# print(label_counts(val_df))
# print(label_counts(test_df))

Shirt          2594
Vintage        6876
Sweatshirt      402
T-shirt        1364
Long Sleeve     857
               ... 
Mini dress      205
Midi dress      201
Maxi dress      209
Turtleneck       37
Leather         553
Length: 66, dtype: int64


In [16]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision.models import resnet50, ResNet50_Weights

In [17]:
# Initialize the Weight Transforms
weights = ResNet50_Weights.DEFAULT
preprocess = weights.transforms()

In [18]:
from PIL import Image
import os
from torch.utils.data import Dataset

Image.init() # reinitialize the image library to clear the cache

class ClothingDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_name = self.df.iloc[idx, 0]  # Assuming the first column is 'Image name'
        image_path = os.path.join(self.image_dir, image_name)
        image = Image.open(image_path)

        # Force initial conversion to RGB
        image = image.convert('RGB') 
        
        # Check for palette mode and transparency (even after RGB conversion)
        if image.mode == 'P' and image.info.get("transparency", None) is not None:
            image = image.convert('RGBA')  # Convert to RGBA if still needed
        
        # Use the preprocess transform
        image = preprocess(image)
        image.requires_grad = True

        # Convert labels to numeric format (int64 in this example)
        labels = torch.tensor(self.df.iloc[idx, 1:].values.astype(np.int64), dtype=torch.float32)

        if self.transform:
            image = self.transform(image) 

        return image, labels

In [19]:
from torch.utils.data import DataLoader

train_dataset = ClothingDataset(train_df, 'images', transform=None)  # No additional transforms
val_dataset = ClothingDataset(val_df, 'images', transform=None)
test_dataset = ClothingDataset(test_df, 'images', transform=None)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [20]:
from torchvision.models import resnet50
# Load the pre-trained model
resnet50 = resnet50(weights=ResNet50_Weights.DEFAULT)

# Modify for your number of classes (28 in your example)
num_classes = 66 
resnet50.fc = nn.Sequential(
    nn.Linear(resnet50.fc.in_features, num_classes),
    nn.Sigmoid()  # Sigmoid for multi-label
)

In [21]:
for param in resnet50.parameters():
    param.requires_grad = False

In [22]:
import torch.optim as optim

criterion = nn.BCELoss()  # Binary Cross Entropy for multi-label
optimizer = optim.Adam(resnet50.fc.parameters(), lr=0.001)  # Only optimize the last layer

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [24]:
from sklearn.metrics import accuracy_score, f1_score, hamming_loss


resnet50.to(device)

num_epochs = 10

best_val_loss = float('inf')  # Initialize with a high value
patience = 5  # Number of epochs to wait before stopping
epochs_without_improvement = 0
best_model_state = None  # To store the best model's state_dict

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

for epoch in range(num_epochs):
    resnet50.train()  # Set the model to training mode
    
    # Training Loop
    train_loss = 0.0  # Initialize training loss for the epoch
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet50(images)
        loss = criterion(outputs, labels)
        train_loss += loss.item() * images.size(0)  # Accumulate loss
        loss.backward()
        optimizer.step()

    train_loss /= len(train_loader.dataset)  # Average training loss

    # Validation Loop (Evaluation Mode)
    resnet50.eval()  # Set the model to evaluation mode
    val_loss = 0.0  # Initialize validation loss
    all_preds = []
    all_labels = []
    with torch.no_grad():  # No gradients needed during evaluation
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = resnet50(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)

            # Convert outputs to binary predictions (threshold at 0.5)
            preds = (outputs > 0.5).float() 

            all_preds.append(preds.cpu().numpy())  # Accumulate predictions
            all_labels.append(labels.cpu().numpy())

    val_loss /= len(val_loader.dataset)  # Average validation loss
    scheduler.step()  # Update learning rate after the epoch
    
    # Concatenate predictions and labels
    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)  
    f1_micro = f1_score(all_labels, all_preds, average='micro')
    f1_macro = f1_score(all_labels, all_preds, average='macro')
    hamming = hamming_loss(all_labels, all_preds)

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {accuracy:.4f}, Val F1 (micro): {f1_micro:.4f}, Val F1 (macro): {f1_macro:.4f}, Val Hamming Loss: {hamming:.4f}')
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = resnet50.state_dict()
        torch.save(best_model_state, 'best_model.pth')
        print(f'New best model saved with validation loss: {best_val_loss:.4f}')

    # Early stopping (optional) 
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print("Early stopping triggered!")
            break  # Stop training



Epoch 1/10, Train Loss: 0.6955, Val Loss: 0.6952, Val Accuracy: 0.0000, Val F1 (micro): 0.1081, Val F1 (macro): 0.0926, Val Hamming Loss: 0.4954
New best model saved with validation loss: 0.6952




Epoch 2/10, Train Loss: 0.6955, Val Loss: 0.6963, Val Accuracy: 0.0000, Val F1 (micro): 0.1071, Val F1 (macro): 0.0930, Val Hamming Loss: 0.4976




KeyboardInterrupt: 