In [1]:
import numpy as np
import scipy.io
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
from PIL import Image
import gc
import random
from matplotlib.pyplot import imshow
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader


In [2]:
scores = scipy.io.loadmat('Fashion144k_v1/feat/relvotes.mat')['X'][0]

In [3]:
photos_list = np.array(pd.read_csv('Fashion144k_v1/photos.txt', names=['Filename'])['Filename'])

In [4]:
split_file = scipy.io.loadmat('Fashion144k_v1/split.mat')
#splitting data into subsets according to split.mat file
train_paths = photos_list[split_file['trainids']][0]
train_labels = scores[split_file['trainids']][0]
test_paths = photos_list[split_file['testids']][0]
test_labels = scores[split_file['testids']][0]
val_paths = photos_list[split_file['validids']][0]
val_labels = scores[split_file['validids']][0]

In [5]:
# Class based on torch Dataset, specific to current Dataset
class FashionDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, labels, transforms=None):
        self.labels = labels
        self.image_paths = image_paths
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        # .convert(mode='RGB') is necessary because 
        # singe channel pics are breaking model later
        image = Image.open(f'Fashion144k_v1/photos/{self.image_paths[index]}').convert(mode='RGB')
        label = self.labels[index]
        
        if self.transforms is not None:
            image = self.transforms(image)

        return image, label

In [6]:
# Image augmentation, may change later
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # This transform changes images without augmenting them
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
}

In [7]:
# Creating Datasets
train_data = FashionDataset(train_paths, train_labels, data_transforms['train'])
test_data = FashionDataset(test_paths, test_labels, data_transforms['test'])
val_data = FashionDataset(val_paths, val_labels, data_transforms['test'])

# Creating Dataloaders
batch_size = 100

train_loader = DataLoader(train_data, batch_size = batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_data, batch_size = batch_size, num_workers=0)
val_loader = DataLoader(val_data, batch_size = batch_size, num_workers=0)

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=24, kernel_size=3)
        self.fc1 = nn.Linear(220, 100)
        self.fc2 = nn.Linear(100, 84)
        self.fc3 = nn.Linear(84, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Specifying device for pyTorch to train on
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = Net()
model = model.to(device)

In [9]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [10]:
%%time

num_epochs = 10

train_losses = []
val_losses = []

for epoch in range(1, num_epochs + 1):
    train_loss = 0.0
    val_loss = 0.0
    
    print('train')
    model.train()
    for data, labels in tqdm(train_loader):
        data = data.float().to(device)
        labels = labels.float().to(device)
        #print(data.shape, labels.shape)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
        torch.cuda.empty_cache()
    
    print('eval')
    model.eval()
    for data, labels in val_loader:
        data = data.to(device)
        labels = labels.to(device)
        output = model(data)
        loss = criterion(output, labels)
        val_loss += loss.item() * data.size(0)
        torch.cuda.empty_cache()
        
    train_loss = train_loss/len(train_loader.sampler)
    val_loss = val_loss/len(val_loader.sampler)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    print(f'Epoch: {epoch}/{num_epochs} | Train Loss:{train_loss} | Val Loss:{val_loss}')

train


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
100%|████████████████████████████████████████████████████████████████████████████████| 866/866 [09:30<00:00,  1.52it/s]


eval


RuntimeError: CUDA out of memory. Tried to allocate 170.00 MiB (GPU 0; 6.00 GiB total capacity; 4.15 GiB already allocated; 87.85 MiB free; 4.17 GiB reserved in total by PyTorch)