In [46]:
#lets try and use torch instead
from PIL import Image
import pathlib
import scipy.io
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import SGD
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.data import DataLoader

In [47]:
NUM_CLASSES = 196 #num classes in stanford cars dataset


In [48]:
transforms = transforms.Compose(
[
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])
train = datasets.StanfordCars("cars", split="train", transform=transforms) #download=True if running repo for the first time
test = datasets.StanfordCars("cars", split="test", transform=transforms)#download=True if running repo for the first time

In [49]:
train_loader = DataLoader(train, batch_size = 10, shuffle = True)
test_loader = DataLoader(test, batch_size = 10, shuffle = True)

In [50]:
resnet = models.resnet18(weights= "DEFAULT")
for param in resnet.parameters():
    param.requires_grad = False #freeze all layers for training

In [51]:
#lets set the output layer to be our 196 classes
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 
resnet.fc = nn.Linear(512, NUM_CLASSES)  #lets just do a basic linear classification head as a baseline and move from there
resnet.to(device)
optimizer = SGD(resnet.parameters(), lr=0.0001, momentum=0.9)

#device = torch.device('cpu')
criterion = nn.CrossEntropyLoss()

In [52]:
print(device)

cuda:0


In [55]:
BATCH_SIZE = 30
EPOCHS = 50
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader)

for epoch in range(1, EPOCHS+1):
    running_loss = 0.0
    correct = 0
    total=0
    print(f'Epoch {epoch}\n')
    for batch_idx, (data_, target_) in enumerate(train_loader):
        data_, target_ = data_.to(device), target_.to(device)
        optimizer.zero_grad()
        outputs = resnet(data_)
        loss = criterion(outputs, target_)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        if (batch_idx) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, EPOCHS, batch_idx, total_step, loss.item()))
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain-loss: {np.mean(train_loss):.4f}, train-acc: {(100 * correct/total):.4f}')
    batch_loss = 0
    total_t=0
    correct_t=0
    with torch.no_grad():
        resnet.eval()
        for data_t, target_t in (test_loader):
            data_t, target_t = data_t.to(device), target_t.to(device)
            outputs_t = resnet(data_t)
            loss_t = criterion(outputs_t, target_t)
            batch_loss += loss_t.item()
            _,pred_t = torch.max(outputs_t, dim=1)
            correct_t += torch.sum(pred_t==target_t).item()
            total_t += target_t.size(0)
        val_acc.append(100 * correct_t/total_t)
        val_loss.append(batch_loss/len(test_loader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t/total_t):.4f}\n')

        
        if network_learned:
            valid_loss_min = batch_loss
            torch.save(resnet.state_dict(), 'resnet.pt')
            print('Improvement-Detected, save-model')
    resnet.train()

Epoch 1

Epoch [1/50], Step [0/815], Loss: 5.4213
Epoch [1/50], Step [20/815], Loss: 5.3592
Epoch [1/50], Step [40/815], Loss: 5.3224
Epoch [1/50], Step [60/815], Loss: 5.2684
Epoch [1/50], Step [80/815], Loss: 5.4403
Epoch [1/50], Step [100/815], Loss: 5.4976
Epoch [1/50], Step [120/815], Loss: 5.6152
Epoch [1/50], Step [140/815], Loss: 5.1116
Epoch [1/50], Step [160/815], Loss: 5.4164
Epoch [1/50], Step [180/815], Loss: 5.3592
Epoch [1/50], Step [200/815], Loss: 5.1971
Epoch [1/50], Step [220/815], Loss: 5.0910
Epoch [1/50], Step [240/815], Loss: 5.2815
Epoch [1/50], Step [260/815], Loss: 5.5753
Epoch [1/50], Step [280/815], Loss: 5.0966
Epoch [1/50], Step [300/815], Loss: 5.0963
Epoch [1/50], Step [320/815], Loss: 5.3696
Epoch [1/50], Step [340/815], Loss: 5.2692
Epoch [1/50], Step [360/815], Loss: 5.4746
Epoch [1/50], Step [380/815], Loss: 5.3711
Epoch [1/50], Step [400/815], Loss: 5.2659
Epoch [1/50], Step [420/815], Loss: 5.2017
Epoch [1/50], Step [440/815], Loss: 5.2233
Epoch [1

best achieved validation accuracy of 28.4%, pretty good for just a single linear classification head ontop of such a small network (resnet 18)