In [34]:
import torch
import torchvision
import matplotlib.pyplot as plt
import os

from torch import nn
from torchvision import transforms
from torchinfo import summary
from datetime import datetime

import data_setup, engine, helper_functions, utils

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
image_path = "data/pizza_steak_sushi"
train_dir = image_path + "/train"
test_dir = image_path + "/test"

In [4]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

manual_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
])

In [5]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                              transform=manual_transforms,
                                                                              batch_size=32)

In [6]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

automatic_transforms = weights.transforms()

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              test_dir=test_dir,
                                                                              transform=automatic_transforms,
                                                                              batch_size=32)

In [7]:
model = torchvision.models.efficientnet_b0(weights=weights).to(device)
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [8]:
for param in model.features.parameters():
    param.requires_grad = False

In [9]:
helper_functions.set_seeds(seed=42)
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names))).to(device)

In [10]:
summary(model, 
        input_size=(32,3,224,224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"], 
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [11]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

In [12]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [13]:
helper_functions.set_seeds()
results = engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             optimizer=optimizer,
             loss_fn=loss_fn,
             epochs=5,
             device=device,
             model_experiment=True)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0901 | train_acc: 0.4102 | test_loss: 0.8925 | test_acc: 0.6714
Epoch: 2 | train_loss: 0.8940 | train_acc: 0.6562 | test_loss: 0.8082 | test_acc: 0.7746
Epoch: 3 | train_loss: 0.7455 | train_acc: 0.8398 | test_loss: 0.7417 | test_acc: 0.7642
Epoch: 4 | train_loss: 0.7817 | train_acc: 0.6953 | test_loss: 0.6820 | test_acc: 0.8144
Epoch: 5 | train_loss: 0.6320 | train_acc: 0.7734 | test_loss: 0.6400 | test_acc: 0.8665


In [None]:
# %load_ext tensorboard
# %tensorboard --logdir runs

In [20]:
def create_writer(experiment_name:str,
                  model_name:str,
                  extra:str = None):
    timestamp = datetime.now().strftime("%Y-%m-%d")
    if extra:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    
    return SummaryWriter(log_dir=log_dir)

In [21]:
example_writer =  create_writer(experiment_name="data_10_percent",
                                model_name="effnetb0",
                                extra="5 epochs")
example_writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x296214610>

In [25]:
train_dir_10_percent = "data/pizza_steak_sushi/train"
test_dir = "data/pizza_steak_sushi/test"
train_dir_20_percent = "data/pizza_steak_sushi_20_percent/train"


In [26]:
BATCH_SIZE = 32

train_dataloader_10_percent, test_dataloader_10_percent, class_names = data_setup.create_dataloaders(train_dir=train_dir_10_percent,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=manual_transforms,
                                                                                                     batch_size=BATCH_SIZE)

train_dataloader_20_percent, test_dataloader_20_percent, class_names = data_setup.create_dataloaders(train_dir=train_dir_20_percent,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=manual_transforms,
                                                                                                     batch_size=BATCH_SIZE)

In [27]:
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights)

Downloading: "https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth" to /Users/bilensezgin/.cache/torch/hub/checkpoints/efficientnet_b2_rwightman-c35c1473.pth
100%|██████████| 35.2M/35.2M [00:02<00:00, 16.1MB/s]


In [28]:
summary(model=effnetb2, 
        input_size=(32, 3, 224, 224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

In [30]:
OUT_FEATURES = len(class_names)

def create_effnetb0():
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)
    
    for param in model.features.parameters():
        param.requires_grad = False
        
    helper_functions.set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES)
    ).to(device)
    
    model.name = "effnetb0"
    return model


def create_effnetb2():
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)
    
    for param in model.features.parameters():
        param.requires_grad = False
        
    helper_functions.set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=OUT_FEATURES)
    ).to(device)
    
    model.name = "effnetb2"
    return model

In [31]:
test_model = create_effnetb2()

In [32]:
summary(model=test_model,
        input_size=(32, 3, 224, 224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [33]:
num_epochs = [5, 10]

models = ["effnetb0", "effnetb2"]

train_dataloaders = {"data_10_percent": train_dataloader_10_percent,
                     "data_20_percent": train_dataloader_20_percent}

In [35]:
%%time
helper_functions.set_seeds(seed=42)

experiment_number = 0 

for dataloader_name, train_dataloader in train_dataloaders.items():
    for epoch in num_epochs:
        for model_name in models:
            experiment_number += 1
            print(f"Experiment Number: {experiment_number}")
            
            if model_name == "effnetb0":
                model = create_effnetb2()
            else:
                model = create_effnetb2()
            
            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
            
            engine.train(model=model,
                         train_dataloader=train_dataloader,
                         test_dataloader=test_dataloader,
                         optimizer=optimizer,
                         loss_fn=loss_fn,
                         epochs=epoch,
                         device=device,
                         writer=create_writer(experiment_name=dataloader_name,
                                              model_name=model_name,
                                              extra=f"{epoch}_epochs"))
            
            save_filepath = f"07_{model_name}_{dataloader_name}_{epoch}_epochs.pth"
            utils.save_model(model=model,
                             target_dir="models",
                             model_name=save_filepath)

Experiment Number: 1


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0917 | train_acc: 0.3984 | test_loss: 0.9333 | test_acc: 0.7216
Epoch: 2 | train_loss: 0.9300 | train_acc: 0.6562 | test_loss: 0.8814 | test_acc: 0.7339
Epoch: 3 | train_loss: 0.7961 | train_acc: 0.8398 | test_loss: 0.7367 | test_acc: 0.8864
Epoch: 4 | train_loss: 0.7130 | train_acc: 0.7734 | test_loss: 0.6998 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6842 | train_acc: 0.7812 | test_loss: 0.7204 | test_acc: 0.8561
Experiment Number: 2


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0917 | train_acc: 0.3984 | test_loss: 0.9333 | test_acc: 0.7216
Epoch: 2 | train_loss: 0.9300 | train_acc: 0.6562 | test_loss: 0.8814 | test_acc: 0.7339
Epoch: 3 | train_loss: 0.7961 | train_acc: 0.8398 | test_loss: 0.7367 | test_acc: 0.8864
Epoch: 4 | train_loss: 0.7130 | train_acc: 0.7734 | test_loss: 0.6998 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6842 | train_acc: 0.7812 | test_loss: 0.7204 | test_acc: 0.8561
Experiment Number: 3


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0917 | train_acc: 0.3984 | test_loss: 0.9333 | test_acc: 0.7216
Epoch: 2 | train_loss: 0.9300 | train_acc: 0.6562 | test_loss: 0.8814 | test_acc: 0.7339
Epoch: 3 | train_loss: 0.7961 | train_acc: 0.8398 | test_loss: 0.7367 | test_acc: 0.8864
Epoch: 4 | train_loss: 0.7130 | train_acc: 0.7734 | test_loss: 0.6998 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6842 | train_acc: 0.7812 | test_loss: 0.7204 | test_acc: 0.8561
Epoch: 6 | train_loss: 0.6164 | train_acc: 0.8086 | test_loss: 0.6291 | test_acc: 0.8759
Epoch: 7 | train_loss: 0.5576 | train_acc: 0.7969 | test_loss: 0.6368 | test_acc: 0.8551
Epoch: 8 | train_loss: 0.5705 | train_acc: 0.7891 | test_loss: 0.5849 | test_acc: 0.8655
Epoch: 9 | train_loss: 0.4783 | train_acc: 0.9258 | test_loss: 0.5748 | test_acc: 0.9072
Epoch: 10 | train_loss: 0.5241 | train_acc: 0.8164 | test_loss: 0.5913 | test_acc: 0.8968
Experiment Number: 4


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0917 | train_acc: 0.3984 | test_loss: 0.9333 | test_acc: 0.7216
Epoch: 2 | train_loss: 0.9300 | train_acc: 0.6562 | test_loss: 0.8814 | test_acc: 0.7339
Epoch: 3 | train_loss: 0.7961 | train_acc: 0.8398 | test_loss: 0.7367 | test_acc: 0.8864
Epoch: 4 | train_loss: 0.7130 | train_acc: 0.7734 | test_loss: 0.6998 | test_acc: 0.8864
Epoch: 5 | train_loss: 0.6842 | train_acc: 0.7812 | test_loss: 0.7204 | test_acc: 0.8561
Epoch: 6 | train_loss: 0.6164 | train_acc: 0.8086 | test_loss: 0.6291 | test_acc: 0.8759
Epoch: 7 | train_loss: 0.5576 | train_acc: 0.7969 | test_loss: 0.6368 | test_acc: 0.8551
Epoch: 8 | train_loss: 0.5705 | train_acc: 0.7891 | test_loss: 0.5849 | test_acc: 0.8655
Epoch: 9 | train_loss: 0.4783 | train_acc: 0.9258 | test_loss: 0.5748 | test_acc: 0.9072
Epoch: 10 | train_loss: 0.5241 | train_acc: 0.8164 | test_loss: 0.5913 | test_acc: 0.8968
Experiment Number: 5


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9804 | train_acc: 0.5396 | test_loss: 0.8039 | test_acc: 0.8258
Epoch: 2 | train_loss: 0.7556 | train_acc: 0.7875 | test_loss: 0.6704 | test_acc: 0.8561
Epoch: 3 | train_loss: 0.6433 | train_acc: 0.7896 | test_loss: 0.5794 | test_acc: 0.8759
Epoch: 4 | train_loss: 0.5265 | train_acc: 0.8729 | test_loss: 0.5700 | test_acc: 0.8769
Epoch: 5 | train_loss: 0.4353 | train_acc: 0.9062 | test_loss: 0.5135 | test_acc: 0.8864
Experiment Number: 6


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9804 | train_acc: 0.5396 | test_loss: 0.8039 | test_acc: 0.8258
Epoch: 2 | train_loss: 0.7556 | train_acc: 0.7875 | test_loss: 0.6704 | test_acc: 0.8561
Epoch: 3 | train_loss: 0.6433 | train_acc: 0.7896 | test_loss: 0.5794 | test_acc: 0.8759
Epoch: 4 | train_loss: 0.5265 | train_acc: 0.8729 | test_loss: 0.5700 | test_acc: 0.8769
Epoch: 5 | train_loss: 0.4353 | train_acc: 0.9062 | test_loss: 0.5135 | test_acc: 0.8864
Experiment Number: 7


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9804 | train_acc: 0.5396 | test_loss: 0.8039 | test_acc: 0.8258
Epoch: 2 | train_loss: 0.7556 | train_acc: 0.7875 | test_loss: 0.6704 | test_acc: 0.8561
Epoch: 3 | train_loss: 0.6433 | train_acc: 0.7896 | test_loss: 0.5794 | test_acc: 0.8759
Epoch: 4 | train_loss: 0.5265 | train_acc: 0.8729 | test_loss: 0.5700 | test_acc: 0.8769
Epoch: 5 | train_loss: 0.4353 | train_acc: 0.9062 | test_loss: 0.5135 | test_acc: 0.8864
Epoch: 6 | train_loss: 0.4612 | train_acc: 0.8417 | test_loss: 0.4703 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.3964 | train_acc: 0.8646 | test_loss: 0.4786 | test_acc: 0.9072
Epoch: 8 | train_loss: 0.3438 | train_acc: 0.9208 | test_loss: 0.4295 | test_acc: 0.8864
Epoch: 9 | train_loss: 0.3425 | train_acc: 0.9125 | test_loss: 0.4308 | test_acc: 0.8968
Epoch: 10 | train_loss: 0.3291 | train_acc: 0.8875 | test_loss: 0.4508 | test_acc: 0.8864
Experiment Number: 8


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9804 | train_acc: 0.5396 | test_loss: 0.8039 | test_acc: 0.8258
Epoch: 2 | train_loss: 0.7556 | train_acc: 0.7875 | test_loss: 0.6704 | test_acc: 0.8561
Epoch: 3 | train_loss: 0.6433 | train_acc: 0.7896 | test_loss: 0.5794 | test_acc: 0.8759
Epoch: 4 | train_loss: 0.5265 | train_acc: 0.8729 | test_loss: 0.5700 | test_acc: 0.8769
Epoch: 5 | train_loss: 0.4353 | train_acc: 0.9062 | test_loss: 0.5135 | test_acc: 0.8864
Epoch: 6 | train_loss: 0.4612 | train_acc: 0.8417 | test_loss: 0.4703 | test_acc: 0.8968
Epoch: 7 | train_loss: 0.3964 | train_acc: 0.8646 | test_loss: 0.4786 | test_acc: 0.9072
Epoch: 8 | train_loss: 0.3438 | train_acc: 0.9208 | test_loss: 0.4295 | test_acc: 0.8864
Epoch: 9 | train_loss: 0.3425 | train_acc: 0.9125 | test_loss: 0.4308 | test_acc: 0.8968
Epoch: 10 | train_loss: 0.3291 | train_acc: 0.8875 | test_loss: 0.4508 | test_acc: 0.8864
CPU times: user 2h 7min 47s, sys: 14min 10s, total: 2h 21min 57s
Wall time: 54min 56s
