![clothing_classification](clothing_classification.png)


Fashion Forward is a new AI-based e-commerce clothing retailer.
They want to use image classification to automatically categorize new product listings, making it easier for customers to find what they're looking for. It will also assist in inventory management by quickly sorting items.

As a data scientist tasked with implementing a garment classifier, your primary objective is to develop a machine learning model capable of accurately categorizing images of clothing items into distinct garment types such as shirts, trousers, shoes, etc.

In [17]:
# Run the cells below first

In [18]:
!pip install torchmetrics
!pip install torchvision

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [19]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import Accuracy, Precision, Recall
import torch.nn.functional as F

In [20]:
# Load datasets
from torchvision import datasets
import torchvision.transforms as transforms

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(45),
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

test_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(45),
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_data = datasets.FashionMNIST(root='./data', train=True, download=True, transform=train_transform)
test_data = datasets.FashionMNIST(root='./data', train=False, download=True, transform=test_transform)

In [21]:
# print(len(train_data.classes))
image_size = train_data[0][0].shape[1]

class FashionNet(nn.Module):
    def __init__(self, num_classes):
        super(FashionNet, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(16 * (image_size // 2)**2 , num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        
        return x

In [22]:
# class FashionNet(nn.Module):
#     def __init__(self, num_classes):
#         super(FashionNet, self).__init__()
        
#         self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
#         self.conv4 = nn.Conv2d(128, 512, kernel_size=3, padding=1)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.dropout = nn.Dropout(0.5)
#         self.fc1 = nn.Linear(512*4*4, 1024)
#         self.fc2 = nn.Linear(1024, num_classes)
#         self.batch_norm1 = nn.BatchNorm2d(32)
#         self.batch_norm2 = nn.BatchNorm2d(64)
#         self.batch_norm3 = nn.BatchNorm2d(128)
#         self.batch_norm4 = nn.BatchNorm2d(512)
        
#     def forward(self, x):
#         x = self.pool(F.relu(self.batch_norm1(self.conv1(x))))
#         x = self.pool(F.relu(self.batch_norm2(self.conv2(x))))
#         x = self.pool(F.relu(self.batch_norm3(self.conv3(x))))
#         x = self.pool(F.relu(self.batch_norm4(self.conv4(x))))
#         x = x.view(-1, 512*4*4)
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = self.fc2(x)
#         return x
    
#     # def _calculate_feature_size(self):
#     #     with torch.no_grad:
#     #         dummy = torch.zeros()

In [23]:
# # Start coding here
# # Use as many cells as you need 

# class FashionNet(nn.Module):
#     def __init__(self, num_classes):
#         super(FashionNet, self).__init__()
#         self.feature_extractor = nn.Sequential(
#             nn.Conv2d(1, 32, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2),
#             nn.Conv2d(32, 64, kernel_size=3, padding=1),
#             nn.ELU(),
#             # nn.Conv2d(64, 128, kernel_size=3, padding=1),
#             # nn.ReLU(),
#             # nn.Conv2d(128, 512, kernel_size=3, padding=1),
#             # nn.ReLU(),
#             nn.Flatten(),
#         )
#         self._calculate_feature_size()
        
#         self.classifier = nn.Linear(self.feature_size, num_classes)
        
#     def forward(self, x):
#         x = self.feature_extractor(x)
#         x = self.classifier(x)
#         return x
    
#     def _calculate_feature_size(self):
#         # Use a dummy tensor to compute the output size of the feature extractor
#         with torch.no_grad():
#             dummy_input = torch.zeros(1, 1, 64, 64)  # Assuming input image size 
#             feature_size = self.feature_extractor(dummy_input).view(1, -1).size(1)
#             print(feature_size)                                                                   
#         self.feature_size = feature_size
    

In [24]:
train_loader = DataLoader(train_data, shuffle=True, batch_size=10)

In [25]:
num_classes = 10
net = FashionNet(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.1)

def train_net(model, train_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        running_loss = 0
        for images, labels in train_loader:
            optimizer.zero_grad()
            # image = images.squeeze().permute(1,2,0)
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
    train_loss = running_loss / len(train_loader)
    print(f"Training loss after {num_epochs} epochs is {train_loss}")

In [26]:
# train_net(net, train_loader, criterion, optimizer, 5)

In [27]:
for images, labels in train_loader:
    print(images[0].shape)
    break

# print(len(test_loader))

torch.Size([1, 64, 64])


### Prediction

In [28]:
test_loader = DataLoader(test_data, shuffle=False, batch_size=10)

In [29]:
precision_macro = Precision(task='multiclass', num_classes=10, average='macro')
recall_macro = Recall(task='multiclass', num_classes=10, average='macro')
accuracy_macro = Accuracy(task='multiclass', num_classes=10, average='macro')


predictions = []

net.eval()
with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        outputs = net.forward(images.reshape(-1, 1, image_size, image_size))
        preds = torch.argmax(outputs, dim=-1)
        predictions.extend(preds.cpu().tolist())
        precision_macro(preds, labels)
        recall_macro(preds, labels)
        accuracy_macro(preds, labels)
    
precision = precision_macro.compute()
recall = recall_macro.compute()
accuracy = accuracy_macro.compute()

In [30]:
precision = float(precision_macro.compute())
recall = float(recall_macro.compute())
accuracy = float(accuracy_macro.compute())

In [31]:
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Accuracy: {accuracy}")

Precision: 0.046081073582172394
Recall: 0.08709999918937683
Accuracy: 0.08709999918937683


In [32]:
print(predictions)
predictions[0:20]

[2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 9, 2, 2, 2, 2, 2, 6, 2, 9, 2, 2, 2, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 9, 2, 2, 2, 9, 2, 2, 9, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 6, 5, 2, 2, 9, 2, 2, 2, 2, 9, 2, 9, 2, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 2, 2, 9, 9, 2, 2, 2, 2, 2, 6, 6, 2, 5, 2, 2, 2, 2, 9, 6, 9, 9, 2, 2, 9, 2, 2, 6, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 9, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 6, 9, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 6, 6, 2, 2, 2, 2, 6, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 6, 2, 9, 6, 6, 2, 2, 9, 2, 9, 2, 2, 2, 2, 2, 2, 2, 6, 2, 6, 2, 2, 2, 2, 0, 2, 6, 2, 2, 6, 2, 6, 2, 2, 2, 2, 9, 2, 6, 6, 6, 6, 2, 2, 9, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 9, 2, 9, 2, 2, 2, 2, 6, 2, 9, 2, 2, 9, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 9, 9, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 6, 2, 2, 2, 2, 6, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 9, 

[2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 9]