# 0. Now we have chose FoodVision from PyTorvh torchvision to work with

We need to:
1. Prepare our data from torch vision
2. Build a model( we will use pre-build model to make comparison)
    * 1. Choose an optimizer and loss function
    * 2. Design a training and testing loop
3. Fit the model to the data and make a prediction
4. Evaluate the model
5. Improve through experiment
6. Save model

## 5. Improve through experiment

In [1]:
# Prepare the data

# Set image path
from pathlib import Path
from scripts import data_setup

image_data = Path("data/") / "food_10_percent"

train_dir = image_data / "train"
test_dir = image_data / "test"

# Setup train and test transform
from torchvision import transforms

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor(),
    normalize
])

test_transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.ToTensor(),
    normalize
])

# create dataloader
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               train_transforms=train_transform,
                                                                               test_transforms=test_transform,
                                                                               batch_size=32,
                                                                               )

train_dataloader, test_dataloader, len(class_names)

(<torch.utils.data.dataloader.DataLoader at 0x2884ec6c390>,
 <torch.utils.data.dataloader.DataLoader at 0x2884ecda010>,
 101)

In [2]:
import torch as T
import torchvision
from torchinfo import summary
import torch.nn as nn
# device agnostic code
device = "cuda" if T.cuda.is_available() else "cpu"

# create effnet b0
b0_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
Effnet_b0 = torchvision.models.efficientnet_b0(weights=b0_weights)

# Freeze the feature layer
for params in Effnet_b0.features.parameters():
    params.requires_grad = False

# Change the classifier

## Before changeing we need to know effnet_b0 info by using summary
"""summary(model=Effnet_b0,
        input_size=(1,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        row_settings=["var_names"])"""

## From the above we can know Linear layer takes in 1280 and out put 1000
# Change classifer to own preference

Effnet_b0.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names))
)

## Print out to make sure
summary(model=Effnet_b0,
        input_size=(1,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        row_settings=["var_names"])



Layer (type (var_name))                                      Input Shape               Output Shape              Param #                   Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]          [1, 101]                  --                        Partial
├─Sequential (features)                                      [1, 3, 224, 224]          [1, 1280, 7, 7]           --                        False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]          [1, 32, 112, 112]         --                        False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]          [1, 32, 112, 112]         (864)                     False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]         [1, 32, 112, 112]         (64)                      False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]         [1, 32, 112, 112]         --         

## 5.1 Let's improve our train() from `engine.py` with Summary Writer

In [3]:
from scripts.engine import train_step, test_step
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm
import torch.nn as nn


def train(model:nn.Module,
          train_dataloader:T.utils.data.DataLoader,
          test_dataloader:T.utils.data.DataLoader,
          loss_fn:nn.Module,
          optimizer:T.optim.Optimizer,
          epochs:int,
          device:T.device,
          writer:T.utils.tensorboard.SummaryWriter):
    for epoch in tqdm(range(epochs)):
        # Create empty results dictionary
        results = {"train_loss": [],
                "train_acc": [],
                "test_loss": [],
                "test_acc": []
        }
        model.to(device)
        train_loss, train_acc = train_step(model=model,
                                        train_dataloader=train_dataloader,
                                        loss_fn=loss_fn,
                                        optimizer=optimizer,
                                        device=device)
        test_loss, test_acc = test_step(model=model,
                                        test_dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)
        ### EXPERIMENT ###
        writer.add_scalars(main_tag="Accuracy",
                          tag_scalar_dict={"train_acc":train_acc,
                                           "test_acc" :test_acc},
                            global_step = epoch)
        writer.add_scalars(main_tag="Loss",
                          tag_scalar_dict={"train_loss": train_loss,
                                           "test_loss": test_loss},
                                           global_step=epoch)
        writer.add_graph(model=model,
                         input_to_model=T.rand(32,3,224,224).to(device))
        writer.close()

        ### END ###
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )
        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results


Code is writtern lets replace it into our engine.py

In [4]:
from torch.utils.tensorboard.writer import SummaryWriter
def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str = None):
    """ 
    Creates a torch.utils.tensorboard.writer.SummaryWriter() instance tracking to a specific
    """
    from datetime import datetime
    import os

    # Get timestamp of current date in reverse order
    timestamp = datetime.now().strftime("%Y-%m-%d")

    if extra:
        # Create log directory
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    print(f"[INFO] Created SummaryWriter saving to {log_dir}")
    return SummaryWriter(log_dir=log_dir)

In [6]:
# Train our effnet_b0 with our data
## Setup loss and optimizer
from scripts import engine
loss_fn = nn.CrossEntropyLoss()
optimizer = T.optim.Adam(params=Effnet_b0.parameters(),
                         lr=0.1)

# Get result
Effnetb0_results = engine.train(model=Effnet_b0,
                         train_dataloader=train_dataloader,
                         test_dataloader=test_dataloader,
                         loss_fn=loss_fn,
                         optimizer=optimizer,
                         epochs=5,
                         device=device,
                         writer=create_writer(experiment_name="10%_data",
                                              model_name="Effnet_b0",
                                              extra="5_epochs"))

[INFO] Created SummaryWriter saving to runs\2024-10-20\10%_data\Effnet_b0\5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 25.2052 | train_acc: 0.1502 | test_loss: 22.5275 | test_acc: 0.3171
Epoch: 2 | train_loss: 25.7342 | train_acc: 0.2738 | test_loss: 26.2076 | test_acc: 0.3186
Epoch: 3 | train_loss: 25.7159 | train_acc: 0.3297 | test_loss: 27.5323 | test_acc: 0.3501
Epoch: 4 | train_loss: 26.1466 | train_acc: 0.3588 | test_loss: 28.5846 | test_acc: 0.3631
Epoch: 5 | train_loss: 26.6033 | train_acc: 0.3766 | test_loss: 29.9920 | test_acc: 0.3724


### 5.2 it seems our model is training but could we use other effnet to train better?

In [16]:
# create function to build effnet_b0, efffnet_b2, effnet_b4

def create_b0():
    # create effnet b0
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    # Freeze the feature layer
    for params in model.features.parameters():
        params.requires_grad = False


    # Change classifer to own preference

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, out_features=len(class_names))
    ).to(device)

    # Give the model a name
    model.name = "effnetb0"
    print(f"[INFO] Created new {model.name} model...")
    return model

def create_b2():
    # create effnet b0
    b2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=b2_weights)

    # Freeze the feature layer
    for params in model.features.parameters():
        params.requires_grad = False


    # Change classifer to own preference

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=len(class_names))
    )
    # Give the model a name
    model.name = "effnetb2"
    print(f"[INFO] Created new {model.name} model...")
    return model

def create_b4():
    # create effnet b0
    b4_weights = torchvision.models.EfficientNet_B4_Weights.DEFAULT
    model = torchvision.models.efficientnet_b4(weights=b4_weights)

    # Freeze the feature layer
    for params in model.features.parameters():
        params.requires_grad = False


    # Change classifer to own preference

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.4, inplace=True),
        nn.Linear(in_features=1792, out_features=len(class_names))
    )
    # Give the model a name
    model.name = "effnetb4"
    print(f"[INFO] Created new {model.name} model...")
    return model


In [17]:
dummy = create_b4()
summary(model=dummy,
        input_size=(1,3,224,224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        row_settings=["var_names"])

[INFO] Created new effnetb4 model...


Layer (type (var_name))                                      Input Shape               Output Shape              Param #                   Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]          [1, 101]                  --                        Partial
├─Sequential (features)                                      [1, 3, 224, 224]          [1, 1792, 7, 7]           --                        False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]          [1, 48, 112, 112]         --                        False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]          [1, 48, 112, 112]         (1,296)                   False
│    │    └─BatchNorm2d (1)                                  [1, 48, 112, 112]         [1, 48, 112, 112]         (96)                      False
│    │    └─SiLU (2)                                         [1, 48, 112, 112]         [1, 48, 112, 112]         --         

In [21]:
#%%time
from scripts.save_model import save_model

# hyperparameters
num_epochs = [5, 10]
models = ["effnetb0","effnetb2","effnetb4"]
train_dataloader_set ={"train_dataloader":train_dataloader}

# Keep track of experiment numbers
experiment_number = 0

# Loop through each dataloader
for dataloader_name, train_dataloader in train_dataloader_set.items():
    # Loop through the epochs
    for epochs in num_epochs:
        # Loop through each model name and create a new model instance
        for model_name in models:
            #Print out info
            experiment_number +=1
            print(f"[INFO] Experiment number: {experiment_number}")
            print(f"[INFO] Model: {model_name}")
            print(f"[INFO] DataLoader: {dataloader_name}")
            print(f"[INFO] Number of epochs: {epochs}")

            # Select and create the model
            if model_name == "effnetb0":
                model=create_b0()
            elif model_name == "effnetb2" :
                model=create_b2()
            else:
                model=create_b4()
            
            # Create a new loss and optimizer for every model
            loss_fn = nn.CrossEntropyLoss()
            optimizer = T.optim.Adam(params=model.parameters(),
                                     lr=0.001)
            
            # Train target model with target dataloader and track experiment
            # Note: using train() rather engine,.train() coz this train has been modified
            engine.train(model=model,
                  train_dataloader=train_dataloader,
                  test_dataloader=test_dataloader,
                  loss_fn=loss_fn,
                  optimizer=optimizer,
                  epochs=epochs,
                  device=device,
                  writer=create_writer(experiment_name=dataloader_name,
                                       model_name=model_name,
                                       extra=f"{epochs}_epochs"))
            
            #Save the model to file so we can import it later if need be
            save_filepath = f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"

            save_model(model=model,
                       target_dir="models",
                       model_name=save_filepath)
            print("-"*50 +"\n")

[INFO] Experiment number: 1
[INFO] Model: effnetb0
[INFO] DataLoader: train_dataloader
[INFO] Number of epochs: 5
[INFO] Created new effnetb0 model...
[INFO] Created SummaryWriter saving to runs\2024-10-20\train_dataloader\effnetb0\5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 3.9300 | train_acc: 0.1764 | test_loss: 3.0561 | test_acc: 0.3726
Epoch: 2 | train_loss: 2.9651 | train_acc: 0.3601 | test_loss: 2.5536 | test_acc: 0.4300
Epoch: 3 | train_loss: 2.5814 | train_acc: 0.4227 | test_loss: 2.3522 | test_acc: 0.4550
Epoch: 4 | train_loss: 2.3640 | train_acc: 0.4543 | test_loss: 2.2351 | test_acc: 0.4744
Epoch: 5 | train_loss: 2.2183 | train_acc: 0.4736 | test_loss: 2.1843 | test_acc: 0.4729
[INFO] Saving model to: models\07_effnetb0_train_dataloader_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 2
[INFO] Model: effnetb2
[INFO] DataLoader: train_dataloader
[INFO] Number of epochs: 5
[INFO] Created new effnetb2 model...
[INFO] Created SummaryWriter saving to runs\2024-10-20\train_dataloader\effnetb2\5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 4.0215 | train_acc: 0.1655 | test_loss: 3.3010 | test_acc: 0.3412


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000028847E72E80>
Traceback (most recent call last):
  File "d:\Study\ML_learning\myenv\Lib\site-packages\torch\utils\data\dataloader.py", line 1604, in __del__
    self._shutdown_workers()
  File "d:\Study\ML_learning\myenv\Lib\site-packages\torch\utils\data\dataloader.py", line 1562, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
                                   ^^^^^^^^^^^^^^^^^^^^
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


Epoch: 2 | train_loss: 3.0812 | train_acc: 0.3520 | test_loss: 2.7496 | test_acc: 0.4071
Epoch: 3 | train_loss: 2.6735 | train_acc: 0.4071 | test_loss: 2.5293 | test_acc: 0.4288
Epoch: 4 | train_loss: 2.4354 | train_acc: 0.4470 | test_loss: 2.3890 | test_acc: 0.4451
Epoch: 5 | train_loss: 2.3010 | train_acc: 0.4650 | test_loss: 2.2860 | test_acc: 0.4569
[INFO] Saving model to: models\07_effnetb2_train_dataloader_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 3
[INFO] Model: effnetb4
[INFO] DataLoader: train_dataloader
[INFO] Number of epochs: 5
[INFO] Created new effnetb4 model...
[INFO] Created SummaryWriter saving to runs\2024-10-20\train_dataloader\effnetb4\5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 4.3121 | train_acc: 0.1390 | test_loss: 3.9117 | test_acc: 0.3114
Epoch: 2 | train_loss: 3.7023 | train_acc: 0.2975 | test_loss: 3.4195 | test_acc: 0.3628
Epoch: 3 | train_loss: 3.2916 | train_acc: 0.3588 | test_loss: 3.1136 | test_acc: 0.3758
Epoch: 4 | train_loss: 3.0175 | train_acc: 0.3839 | test_loss: 2.8812 | test_acc: 0.4060
Epoch: 5 | train_loss: 2.8101 | train_acc: 0.4072 | test_loss: 2.7065 | test_acc: 0.4167
[INFO] Saving model to: models\07_effnetb4_train_dataloader_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 4
[INFO] Model: effnetb0
[INFO] DataLoader: train_dataloader
[INFO] Number of epochs: 10
[INFO] Created new effnetb0 model...
[INFO] Created SummaryWriter saving to runs\2024-10-20\train_dataloader\effnetb0\10_epochs


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 3.9332 | train_acc: 0.1805 | test_loss: 3.0786 | test_acc: 0.3674
Epoch: 2 | train_loss: 2.9614 | train_acc: 0.3647 | test_loss: 2.5675 | test_acc: 0.4334
Epoch: 3 | train_loss: 2.5864 | train_acc: 0.4209 | test_loss: 2.3436 | test_acc: 0.4508
Epoch: 4 | train_loss: 2.3771 | train_acc: 0.4563 | test_loss: 2.2539 | test_acc: 0.4701
Epoch: 5 | train_loss: 2.2165 | train_acc: 0.4845 | test_loss: 2.1796 | test_acc: 0.4748
Epoch: 6 | train_loss: 2.1147 | train_acc: 0.5026 | test_loss: 2.1286 | test_acc: 0.4827
Epoch: 7 | train_loss: 2.0137 | train_acc: 0.5163 | test_loss: 2.1111 | test_acc: 0.4880
Epoch: 8 | train_loss: 1.9492 | train_acc: 0.5245 | test_loss: 2.1005 | test_acc: 0.4819
Epoch: 9 | train_loss: 1.8942 | train_acc: 0.5447 | test_loss: 2.0860 | test_acc: 0.4845
Epoch: 10 | train_loss: 1.8274 | train_acc: 0.5554 | test_loss: 2.0737 | test_acc: 0.4906
[INFO] Saving model to: models\07_effnetb0_train_dataloader_10_epochs.pth
-----------------------------------

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 4.0228 | train_acc: 0.1615 | test_loss: 3.2933 | test_acc: 0.3405
Epoch: 2 | train_loss: 3.0580 | train_acc: 0.3541 | test_loss: 2.7603 | test_acc: 0.4075
Epoch: 3 | train_loss: 2.6597 | train_acc: 0.4102 | test_loss: 2.5125 | test_acc: 0.4353
Epoch: 4 | train_loss: 2.4374 | train_acc: 0.4506 | test_loss: 2.3893 | test_acc: 0.4433
Epoch: 5 | train_loss: 2.2938 | train_acc: 0.4702 | test_loss: 2.2944 | test_acc: 0.4543
Epoch: 6 | train_loss: 2.1730 | train_acc: 0.4903 | test_loss: 2.2408 | test_acc: 0.4622
Epoch: 7 | train_loss: 2.0998 | train_acc: 0.4991 | test_loss: 2.2041 | test_acc: 0.4669
Epoch: 8 | train_loss: 2.0502 | train_acc: 0.5033 | test_loss: 2.1883 | test_acc: 0.4662
Epoch: 9 | train_loss: 1.9690 | train_acc: 0.5241 | test_loss: 2.1498 | test_acc: 0.4764
Epoch: 10 | train_loss: 1.9394 | train_acc: 0.5283 | test_loss: 2.1268 | test_acc: 0.4781
[INFO] Saving model to: models\07_effnetb2_train_dataloader_10_epochs.pth
-----------------------------------

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 4.3118 | train_acc: 0.1420 | test_loss: 3.9183 | test_acc: 0.3126
Epoch: 2 | train_loss: 3.7014 | train_acc: 0.3075 | test_loss: 3.4325 | test_acc: 0.3588
Epoch: 3 | train_loss: 3.2946 | train_acc: 0.3501 | test_loss: 3.1097 | test_acc: 0.3715
Epoch: 4 | train_loss: 3.0182 | train_acc: 0.3847 | test_loss: 2.8863 | test_acc: 0.3964
Epoch: 5 | train_loss: 2.8034 | train_acc: 0.4178 | test_loss: 2.7225 | test_acc: 0.4190
Epoch: 6 | train_loss: 2.6646 | train_acc: 0.4260 | test_loss: 2.6176 | test_acc: 0.4291
Epoch: 7 | train_loss: 2.5489 | train_acc: 0.4443 | test_loss: 2.5255 | test_acc: 0.4349
Epoch: 8 | train_loss: 2.4497 | train_acc: 0.4593 | test_loss: 2.4487 | test_acc: 0.4460
Epoch: 9 | train_loss: 2.3905 | train_acc: 0.4637 | test_loss: 2.3688 | test_acc: 0.4571
Epoch: 10 | train_loss: 2.2917 | train_acc: 0.4708 | test_loss: 2.3411 | test_acc: 0.4492
[INFO] Saving model to: models\07_effnetb4_train_dataloader_10_epochs.pth
-----------------------------------

In [23]:
import tensorboard

In [27]:
%load_ext tensorboard
%tensorboard --logdir runs/train_dataloader