In [1]:
import torch
import torchvision
import os

In [2]:
NUM_EPOCHS = 20
BATCH_SIZE = 16
OUTPUT_SHAPE=5
MODEL="efficientnet_v2_L"
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
dataset_path=os.path.join(os.getcwd(), "Dataset")

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()])
dataset=datasets.ImageFolder(root=dataset_path, transform=transform)
train_size = int(0.8 * len(dataset))  
val_size = int(0.1 * len(dataset)) 
test_size = len(dataset) - train_size - val_size 
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [4]:
import engine
import utils
from adabelief_pytorch import AdaBelief
import ensembleModel
for x in range(0, 1):

    model1=torchvision.models.efficientnet_v2_s().to(DEVICE)
    model1.classifier=torch.nn.Sequential(torch.nn.Dropout(p=0.2, inplace=True), torch.nn.Linear(in_features=1280, out_features=OUTPUT_SHAPE, bias=True).to(DEVICE))
    model1.load_state_dict(torch.load(r"Models\.fficientnet_v2_s_0.pt", weights_only=True))

    model2=torchvision.models.efficientnet_v2_s().to(DEVICE)
    model2.classifier=torch.nn.Sequential(torch.nn.Dropout(p=0.2, inplace=True), torch.nn.Linear(in_features=1280, out_features=OUTPUT_SHAPE, bias=True).to(DEVICE))
    model2.load_state_dict(torch.load(r"Models\.fficientnet_v2_s_1.pt", weights_only=True))
    model=ensembleModel.AdaptiveEnsembleModel(model1=model1, model2=model2, num_classes=OUTPUT_SHAPE).to(DEVICE)
    for param in model.model1.features.parameters():
        param.requires_grad=False
    for param in model.model2.features.parameters():
        param.requires_grad=False
    model.adaptive_layer.requires_grad=True
    loss_fn=torch.nn.CrossEntropyLoss()
    optimizer=AdaBelief(params=model.parameters())
    engine.train(model=model,
                            train_dataloader=train_loader,
                            val_dataloader=val_loader,
                            test_dataloader=test_loader,
                            loss_fn=loss_fn,
                            optimizer=optimizer,
                            epochs=NUM_EPOCHS,
                            writer=engine.create_writer(experiment_name=MODEL,
                                                        model_name="x",
                                                        extra=f""),
                            device=DEVICE)
    utils.save_model(model=model,
                                target_dir=f"Models/",
                                model_name=f"{MODEL}_{x}.pt")

[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False              False
>=0.1.0 (Current 0.2.0)  1e-16  True               True
[34mSGD better than Adam (e.g. CNN for Image Classification)    Adam better than SGD (e.g. Transformer, GAN)
----------------------------------------------------------  ----------------------------------------------
Recommended eps = 1e-8                                      Recommended eps = 1e-16
[34mFor a complete table of recommended hyperparameters, see
[34mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[32mYou can disable the log message by setting "print_change_log = False", though it is recommended to keep as a reminder.
[0m
Weight decoupling enabled in AdaBelief
Rectification enabled in AdaBelief
[INF

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.6346 | train_acc: 0.2514 | val_loss: 1.5918 | val_acc: 0.3092 | test_loss: 1.5891 | test_acc: 0.3203
Epoch: 2 | train_loss: 1.4803 | train_acc: 0.3771 | val_loss: 4.4904 | val_acc: 0.4699 | test_loss: 6.7856 | test_acc: 0.3828
Epoch: 3 | train_loss: 1.2605 | train_acc: 0.4625 | val_loss: 13.9433 | val_acc: 0.4810 | test_loss: 22.3837 | test_acc: 0.4688
Epoch: 4 | train_loss: 1.1919 | train_acc: 0.4969 | val_loss: 1.5492 | val_acc: 0.5480 | test_loss: 1.4369 | test_acc: 0.4453
Epoch: 5 | train_loss: 1.0280 | train_acc: 0.5410 | val_loss: 6.5305 | val_acc: 0.5513 | test_loss: 2.0853 | test_acc: 0.4844
Epoch: 6 | train_loss: 1.0149 | train_acc: 0.5642 | val_loss: 2.1367 | val_acc: 0.4475 | test_loss: 2.3913 | test_acc: 0.4922
Epoch: 7 | train_loss: 0.9522 | train_acc: 0.5913 | val_loss: 3.1634 | val_acc: 0.4420 | test_loss: 2.5016 | test_acc: 0.5156
Epoch: 8 | train_loss: 0.8260 | train_acc: 0.5990 | val_loss: 1.2523 | val_acc: 0.4732 | test_loss: 1.0565 | test_ac