# Training with Efficient Net B1

In [20]:
# Imports all the stuff needed
from utils import *

In [21]:
# Setting up the model
weights = torchvision.models.EfficientNet_B1_Weights.DEFAULT
model = torchvision.models.efficientnet_b1(weights=weights)
auto_transforms = weights.transforms()

Downloading: "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 142MB/s] 


In [22]:
#Disables gradient on feature layers, corrects output dimension to 10, makes model run on multiple GPUs if possible
if torch.cuda.device_count() > 1:
  print(f"Using {torch.cuda.device_count()} GPUs!")
  model = nn.DataParallel(model)

model = model.to(device)

# If cores > 1 we are dealing with a DataParallel object and our model is model.module
if torch.cuda.device_count() > 1:
    for param in model.module.features.parameters():
        param.requires_grad = False

# if cores = 1 we just have a model so don't need to use model.module
else:
    for param in model.features.parameters():
        param.requires_grad = False

# Changing the final layer so there are 10 different output classes
    model.classifier[-1] = nn.Linear(model.classifier[1].in_features, 10)

In [23]:
# Defining loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Making the dataloaders
train_dataloader, test_dataloader = setup_dataloaders(auto_transforms, 32, 2)

Files already downloaded and verified
Files already downloaded and verified
Train dataloader: 1563 batches of size 32
Test dataloader: 313 batches of size 32
Training data length: 50000, Testing data length: 10000


In [24]:
# Printing a summary of the model so we can check which layers are trainable, also that the output dimensions are correct
summary(model=model,
        input_size=(32, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 10]             --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [25]:
# Training loop
train_time_start_on_gpu = timer()
epochs = 5
train_accs = []
test_accs = []
train_losses = []
test_losses = []

for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch+1}\n----------")

    train_acc, train_loss = train_step(data_loader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=optimizer, accuracy_fn=accuracy_fn)
    test_acc, test_loss = test_step(data_loader=test_dataloader, model=model, loss_fn=loss_fn, accuracy_fn=accuracy_fn)

    train_accs.append(train_acc.item())
    train_losses.append(train_loss.item())
    test_accs.append(test_acc.item())
    test_losses.append(test_loss.item())

train_time_end_on_gpu = timer()
total_train_time = print_train_time(start=train_time_start_on_gpu, end=train_time_end_on_gpu, device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1
----------
Train loss: 0.94712 | Train accuracy: 71.68%
Test loss: 0.63439 | Test accuracy: 79.16%

Epoch: 2
----------
Train loss: 0.70403 | Train accuracy: 76.27%
Test loss: 0.57749 | Test accuracy: 80.49%

Epoch: 3
----------
Train loss: 0.66899 | Train accuracy: 76.96%
Test loss: 0.55465 | Test accuracy: 81.37%

Epoch: 4
----------
Train loss: 0.65285 | Train accuracy: 77.57%
Test loss: 0.53796 | Test accuracy: 81.72%

Epoch: 5
----------
Train loss: 0.64055 | Train accuracy: 77.88%
Test loss: 0.52333 | Test accuracy: 82.29%

Train time on cuda: 514.222 seconds
