In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
import os
from tqdm import tqdm_notebook as tqdm
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np


In [6]:
from dataset_utils import split_dataset_by_brand, split_dataset, reset_index
from sklearn.preprocessing import LabelEncoder

In [203]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cpu


In [297]:
import pandas as pd
clf_dataset = pd.read_csv('./clf_data.csv')
lb = LabelEncoder()
lb.fit(list(set(clf_dataset['Brand'])))
print(lb.classes_)
clf_dataset['label'] = lb.transform(clf_dataset['Brand'])
print(clf_dataset)
train,val,test = split_dataset(split_dataset_by_brand(clf_dataset))
train = reset_index(train.sample(frac=1))
val = reset_index(val.sample(frac=1))
test = reset_index(test.sample(frac=1))


['audemarspiguet' 'breitling' 'cartier' 'gucci' 'iwc' 'movado' 'nomos'
 'omega' 'patekphilippe' 'rolex' 'seiko' 'zenith']
      Unnamed: 0                     img   Brand  label
0              0       0-iwc-13109.0.png     iwc      4
1              1        1-iwc-4899.0.png     iwc      4
2              2       2-iwc-10139.0.png     iwc      4
3              3        3-iwc-3239.0.png     iwc      4
4              4        4-iwc-8499.0.png     iwc      4
...          ...                     ...     ...    ...
3637        3637  3637-zenith-2750.0.png  zenith     11
3638        3638  3638-zenith-4895.0.png  zenith     11
3639        3639  3639-zenith-5950.0.png  zenith     11
3640        3640  3640-zenith-6409.0.png  zenith     11
3641        3641  3641-zenith-4999.0.png  zenith     11

[3642 rows x 4 columns]
242 272 303
243 273 304
246 276 308
242 272 303
240 270 301
245 275 307
245 275 307
240 270 300
242 272 303
242 272 303
240 270 300
242 272 303


In [298]:
train = reset_index(train.sample(25))

In [299]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [251]:
transform = transforms.Compose([
     transforms.Resize((300, 300)),
     transforms.CenterCrop((100, 100)),
     transforms.RandomCrop((80, 80)),
     transforms.RandomHorizontalFlip(p=0.5),
     transforms.RandomRotation(degrees=(-90, 90)),
     transforms.RandomVerticalFlip(p=0.5),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     ])

In [300]:
class Watch_Dataset(Dataset):
    def __init__(self, img_data,img_path,transform=None):
        self.img_path = img_path
        self.transform = transform
        self.img_data = img_data
        
    def __len__(self):
        return len(self.img_data)
    
    def __getitem__(self, index):
        img_name = os.path.join(self.img_path,self.img_data.loc[index, 'img'])
        image = Image.open(img_name).convert('RGB')
        #image = image.convert('RGB')
#         image = image.resize((300,300))
        label = torch.tensor(self.img_data.loc[index, 'label'])
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [301]:
train = Watch_Dataset(train,'./scraper/images',transform)
val = Watch_Dataset(val,'./scraper/images',transform)
test = Watch_Dataset(test,'./scraper/images',transform)

In [302]:
train_loader = torch.utils.data.DataLoader(train, batch_size=1)
validation_loader = torch.utils.data.DataLoader(val, batch_size=1)

In [296]:
[i for i in enumerate(train_loader)]

KeyError: 0

In [303]:
import time
start = time.time()
for i, (data,labels) in enumerate(train_loader):
    print(data.size())
    break
    images = data
end = time.time()
time_spent = (end-start)/60
print(f"{time_spent:.3} minutes")

torch.Size([1, 3, 210, 210])
8.32e-05 minutes


In [91]:
def img_display(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    return npimg

In [92]:
{ i:lb.classes_[i] for i in range(len(lb.classes_))}

{0: 'audemarspiguet',
 1: 'breitling',
 2: 'cartier',
 3: 'gucci',
 4: 'iwc',
 5: 'movado',
 6: 'nomos',
 7: 'omega',
 8: 'patekphilippe',
 9: 'rolex',
 10: 'seiko',
 11: 'zenith'}

In [93]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
watch_types = { i:lb.classes_[i] for i in range(len(lb.classes_))}
# Viewing data examples used for training
fig, axis = plt.subplots(1, 5, figsize=(15, 10))
for i, ax in enumerate(axis.flat):
    with torch.no_grad():
#         print(i,images[i])
        image, label = images[i], labels[i]
        print(label.item())
        ax.set(title = f"{watch_types[label.item()]}") # add label
        ax.imshow(img_display(image)) # add image
        

8


IndexError: index 1 is out of bounds for dimension 0 with size 1

In [281]:
images[0].size()


torch.Size([3, 80, 80])

# MODEL


In [266]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 3 input image channel, 16 output channels, 3x3 square convolution kernel
        self.conv1 = nn.Conv2d(3,16,kernel_size=3,stride=2,padding=1)
        self.conv2 = nn.Conv2d(16, 32,kernel_size=3,stride=2, padding=1)
        self.conv3 = nn.Conv2d(32, 64,kernel_size=3,stride=2, padding=1)
        self.conv4 = nn.Conv2d(64, 64,kernel_size=3,stride=2, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout2d(0.4)
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.batchnorm2 = nn.BatchNorm2d(32)
        self.batchnorm3 = nn.BatchNorm2d(64)
        self.neurons = self.linear_input_neurons()
        self.fc1 = nn.Linear(self.neurons,512 )
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 12)
        
    def forward(self, x):
        x = self.batchnorm1(F.relu(self.conv1(x)))
        x = self.batchnorm2(F.relu(self.conv2(x)))
        x = self.dropout(self.batchnorm2(self.pool(x)))
        x = self.batchnorm3(self.pool(F.relu(self.conv3(x))))
        x = self.dropout(self.conv4(x))
#         print(x.size())
        x = x.view(-1, self.neurons) # Flatten layer
        x = self.dropout(self.fc1(x))
        x = self.dropout(self.fc2(x))
        x = F.log_softmax(self.fc3(x),dim = 1)
        return x
    
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(3, 6, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.neurons = self.linear_input_neurons()
#         print(self.neurons)
#         self.fc1 = nn.Linear(self.neurons, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 12)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, self.neurons)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return F.log_softmax(x, dim=1)
    
    def size_before_fc(self, x):
        x = self.batchnorm1(F.relu(self.conv1(x)))
        x = self.batchnorm2(F.relu(self.conv2(x)))
        x = self.dropout(self.batchnorm2(self.pool(x)))
        x = self.batchnorm3(self.pool(F.relu(self.conv3(x))))
        x = self.dropout(self.conv4(x))

        return x.size()


    # after obtaining the size in above method, we call it and multiply all elements of the returned size.
    def linear_input_neurons(self):
        size = self.size_before_fc(torch.rand(1, 3, 80, 80)) # image size: 64x32
        m = 1
        for i in size:
            m *= i

        return int(m)

    
net = Net()

In [267]:
model = Net()
model

Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout2d(p=0.4, inplace=False)
  (batchnorm1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=64, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=12, bias=True)
)

In [304]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [305]:
def accuracy(out, labels):
    _,pred = torch.max(out, dim=1)
    return torch.sum(pred==labels).item()

In [312]:
val_loss = []
val_acc = []
train_loss = []
train_acc = []
for epoch in range(20):  # loop over the dataset multiple times
    correct = 0
    total = 0
    running_loss = 0.0
    for i, (data_, target_) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data_)
        target_ = target_.type(torch.long)
        
        loss = criterion(outputs, target_)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
#         print(target_,outputs,pred)        
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        
        if i % 10 == 9:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
    print(correct,total)

print('Finished Training')

tensor([8]) tensor([[ 0.0329, -1.5087,  0.2718,  0.3310,  0.4800,  0.1562,  0.9815,  0.0684,
          0.2694, -0.7234,  0.0360, -0.0122]], grad_fn=<AddmmBackward>) tensor([6])
0 1
tensor([0]) tensor([[ 0.0162, -1.5447,  0.3379,  0.2992,  0.5106,  0.1440,  0.9635, -0.0275,
          0.2659, -0.6923,  0.0279,  0.0031]], grad_fn=<AddmmBackward>) tensor([6])
0 2
tensor([7]) tensor([[ 4.8567e-02, -1.5910e+00,  3.5351e-01,  3.5385e-01,  4.9607e-01,
          1.8748e-01,  9.9809e-01,  8.7646e-02,  1.8058e-01, -7.8823e-01,
          4.6541e-02,  4.4763e-04]], grad_fn=<AddmmBackward>) tensor([6])
0 3
tensor([3]) tensor([[ 0.0533, -1.5342,  0.2600,  0.3185,  0.5126,  0.1669,  0.9776,  0.0158,
          0.2495, -0.7305,  0.0212, -0.0052]], grad_fn=<AddmmBackward>) tensor([6])
0 4
tensor([10]) tensor([[ 0.0669, -1.4532,  0.2619,  0.3516,  0.4394,  0.1755,  0.9381,  0.0555,
          0.2278, -0.6528, -0.0231,  0.0019]], grad_fn=<AddmmBackward>) tensor([6])
0 5
tensor([3]) tensor([[ 0.0621, -1.5634

In [308]:
import torchvision.models as models
model = models.resnet18()
model.fc = nn.Linear(512, 12)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [307]:
n_epochs = 12
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader)
for epoch in range(1, n_epochs+1):
    running_loss = 0.0
    # scheduler.step(epoch)
    correct = 0
    total=0
    print(f'Epoch {epoch}\n')
    for batch_idx, (data_, target_) in enumerate(train_loader):
        #data_, target_ = data_.to(device), target_.to(device)# on GPU
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(data_)
        target_ = target_.type(torch.long)
        loss = criterion(outputs, target_)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        if (batch_idx) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item()))
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')
    batch_loss = 0
    total_t=0
    correct_t=0
    with torch.no_grad():
        model.eval()
        for data_t, target_t in (validation_loader):
            #data_t, target_t = data_t.to(device), target_t.to(device)# on GPU
            outputs_t = model(data_t)
            target_t = target_t.type(torch.long)
            loss_t = criterion(outputs_t, target_t)
            batch_loss += loss_t.item()
            _,pred_t = torch.max(outputs_t, dim=1)
            correct_t += torch.sum(pred_t==target_t).item()
            total_t += target_t.size(0)
        val_acc.append(100 * correct_t / total_t)
        val_loss.append(batch_loss/len(validation_loader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t / total_t):.4f}\n')
        # Saving the best weight 
        if network_learned:
            valid_loss_min = batch_loss
            torch.save(model.state_dict(), 'model_classification_tutorial.pt')
            print('Detected network improvement, saving current model')
    model.train()

Epoch 1

Epoch [1/12], Step [0/25], Loss: 3.8131
Epoch [1/12], Step [20/25], Loss: 3.0835

train loss: 2.6356, train acc: 20.0000
validation loss: 2.5681, validation acc: 8.3333

Detected network improvement, saving current model
Epoch 2

Epoch [2/12], Step [0/25], Loss: 3.8131
Epoch [2/12], Step [20/25], Loss: 3.0835

train loss: 2.6356, train acc: 20.0000
validation loss: 2.5789, validation acc: 8.3333

Epoch 3

Epoch [3/12], Step [0/25], Loss: 3.8131
Epoch [3/12], Step [20/25], Loss: 3.0835

train loss: 2.6356, train acc: 20.0000
validation loss: 2.6020, validation acc: 8.8889

Epoch 4

Epoch [4/12], Step [0/25], Loss: 3.8131
Epoch [4/12], Step [20/25], Loss: 3.0835

train loss: 2.6356, train acc: 20.0000


KeyboardInterrupt: 