In [1]:
data_path = '../dataset'

In [2]:
import os

In [3]:
for path in os.listdir(data_path):
    if os.path.isdir(os.path.join(data_path, path)):
        print(os.path.join(data_path, path))

../dataset/test
../dataset/train


In [4]:
train_data_path = os.path.join(data_path, 'train')
test_data_path = os.path.join(data_path, 'test')

In [5]:
train_classes = dict()

for path in sorted(os.listdir(train_data_path)):
    if os.path.isdir(os.path.join(train_data_path, path)):
        train_classes.setdefault(len(train_classes), path)
        
train_classes

{0: 'chicken_curry',
 1: 'chicken_wings',
 2: 'fried_rice',
 3: 'grilled_salmon',
 4: 'hamburger',
 5: 'ice_cream',
 6: 'pizza',
 7: 'ramen',
 8: 'steak',
 9: 'sushi'}

In [6]:
test_classes = dict()

for path in sorted(os.listdir(test_data_path)):
    if os.path.isdir(os.path.join(test_data_path, path)):
        test_classes.setdefault(len(test_classes), path)
        
test_classes

{0: 'chicken_curry',
 1: 'chicken_wings',
 2: 'fried_rice',
 3: 'grilled_salmon',
 4: 'hamburger',
 5: 'ice_cream',
 6: 'pizza',
 7: 'ramen',
 8: 'steak',
 9: 'sushi'}

In [7]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [8]:
torch.cuda.get_device_name(0)

'GeForce GTX 1070'

In [9]:
torch.cuda.empty_cache()

In [10]:
from torchvision import transforms as T

In [11]:
train_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
val_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [13]:
test_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [14]:
from torchvision.datasets import ImageFolder

In [15]:
train_dataset = ImageFolder(
    root=train_data_path,
    transform=train_transform
)

In [16]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 750
    Root location: ../dataset/train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=PIL.Image.BILINEAR)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [17]:
val_dataset = ImageFolder(
    root=train_data_path,
    transform=val_transform
)

In [18]:
val_dataset

Dataset ImageFolder
    Number of datapoints: 750
    Root location: ../dataset/train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=PIL.Image.BILINEAR)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [19]:
test_dataset = ImageFolder(
    root=test_data_path,
    transform=val_transform
)

In [20]:
test_dataset

Dataset ImageFolder
    Number of datapoints: 2500
    Root location: ../dataset/test
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=PIL.Image.BILINEAR)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [21]:
import numpy as np

In [22]:
# Reference: https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb

val_size = .2

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(val_size * num_train))

random_seed = 42 # FYI: https://en.wikipedia.org/wiki/Phrases_from_The_Hitchhiker%27s_Guide_to_the_Galaxy#On_the_Internet_and_in_software

np.random.seed(random_seed)
np.random.shuffle(indices)

In [23]:
from torch.utils.data.sampler import SubsetRandomSampler

In [24]:
train_idx, val_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [25]:
from torch.utils.data import DataLoader

In [26]:
BATCH_SIZE = 16

In [27]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    sampler=train_sampler
)

In [28]:
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    sampler=val_sampler
)

In [29]:
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    shuffle=False
)

In [30]:
from torchvision import models

In [31]:
model = models.resnet18(pretrained=True)

In [32]:
for param in model.parameters():
    param.requires_grad = False

In [33]:
import torch.nn as nn

In [34]:
n_inputs = model.fc.in_features
n_outputs = 10

In [35]:
# sequential_layers = nn.Sequential(
#     nn.Linear(n_inputs, 256),
#     nn.ReLU(),
#     nn.Dropout(.2),
#     nn.Linear(256, 128),
#     nn.ReLU(),
#     nn.Dropout(.2),
#     nn.Linear(128, n_classes),
#     nn.LogSoftmax(dim=1)
# )

In [36]:
sequential_layers = nn.Linear(n_inputs, n_outputs)

In [37]:
model.fc = sequential_layers

In [38]:
model = model.to('cuda')

In [39]:
import torch.optim as optim

In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=.9)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=.1)

In [41]:
loaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

In [42]:
dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset),
    'test': len(test_dataset)
}

In [43]:
from copy import deepcopy

In [44]:
%%time

EPOCHS = 5

for epoch in range(1, EPOCHS+1):
    running_loss = .0
    running_corrects = .0
    best_acc = .0
    print(f"\nEpoch {epoch}/{EPOCHS}\n{'='*25}")
    for phase in ['train', 'val']:
        if phase == 'train': model.train()
        if phase == 'val': model.eval()
        for inputs, labels in loaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        if phase == 'train': scheduler.step()
        if phase == 'val' and epoch_acc > best_acc:
            best_corrects = epoch_acc
            best_model_weights = deepcopy(model.state_dict())
        print(f"Loss ({phase}): {epoch_loss}, Acc ({phase}): {epoch_acc}")


Epoch 1/5
Loss (train): 1.7744100519816082, Acc (train): 0.16799999999999998
Loss (val): 2.1630811049143475, Acc (val): 0.24533333333333332

Epoch 2/5
Loss (train): 1.4199799766540528, Acc (train): 0.3733333333333333
Loss (val): 1.7442166748046875, Acc (val): 0.484

Epoch 3/5
Loss (train): 1.1640930048624674, Acc (train): 0.48133333333333334
Loss (val): 1.4456421149571737, Acc (val): 0.5986666666666667

Epoch 4/5
Loss (train): 0.9879970855712891, Acc (train): 0.5386666666666666
Loss (val): 1.2655967524846394, Acc (val): 0.6533333333333333

Epoch 5/5
Loss (train): 0.8841626841227214, Acc (train): 0.5733333333333334
Loss (val): 1.135031644185384, Acc (val): 0.6946666666666667
CPU times: user 1min 26s, sys: 71.7 ms, total: 1min 26s
Wall time: 17.7 s


In [45]:
torch.save(best_model_weights, 'foodnet.pth')

In [46]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
%%time

for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels)
    
loss = running_loss / dataset_sizes['test']
acc = running_corrects.double() / dataset_sizes['test']

In [None]:
print(f"Test Loss: {epoch_loss}, Test Accuracy: {epoch_acc}")