In [10]:
import os

# import hydra
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from dotenv import load_dotenv
from torch.utils.data import DataLoader

from src.data.make_dataset import make_dataset

# current_script_directory = os.path.dirname(os.path.abspath("C:\Users\dchro\Documents\MLOps\mlops_project\notebooks\check_parallel_data_loading.ipynb"))
# parent_directory = os.path.abspath(os.path.join(current_script_directory, ".."))
# root_directory = os.path.abspath(os.path.join(current_script_directory, ".."))
save_path = os.path.join("C:/Users/dchro/Documents/MLOps/mlops_project", "models", "mobilenetv3_fine_tuned.pth")

config = """train_transforms:
  resize:
    _target_: torchvision.transforms.Resize
    size: [256, 256]
  random_crop:
    _target_: torchvision.transforms.RandomResizedCrop
    size: [224, 224]
  random_hor_flip:
    _target_: torchvision.transforms.RandomHorizontalFlip
  random_rot:
    _target_: torchvision.transforms.RandomRotation
    degrees: 30
  random_affine:
    _target_: torchvision.transforms.RandomAffine
    degrees: 0
    translate: [0.1, 0.1]
  random_persp:
    _target_: torchvision.transforms.RandomPerspective
    distortion_scale: 0.5
    p: 0.5
  to_tensor:
    _target_: torchvision.transforms.ToTensor
  norma:
    _target_: torchvision.transforms.Normalize
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]"""

# @hydra.main(config_path="../../config/", config_name="main.yaml")

def train_model(num_workers = 0):
    # Data Load
    print("Loading data...")
    num_classes = 38
    trainset = make_dataset(config, dataset_type="train")
    val_set = make_dataset(config, dataset_type="val")
    train_loader = DataLoader(
        trainset,
        batch_size=32,
        shuffle=True,
        num_workers=num_workers,
    )
    val_loader = DataLoader(
        val_set, batch_size=32, shuffle=False, num_workers=num_workers
    )

    print("Defining model...")
    # Model definition
    model = timm.create_model(
        "mobilenetv3_large_100", pretrained=True, num_classes=num_classes
    )
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    best_val_loss = float("inf")
    patience = 2  # Number of epochs to wait for improvement
    epochs_without_improvement = 0

    print("Training start...")
    # Training loop
    for ep in range(10):
        total_loss = 0
        num_correct = 0

        for batch_idx, (inputs, labels) in enumerate(train_loader):
            model.train()
            optimizer.zero_grad()
            y_hat = model(inputs)
            batch_loss = criterion(y_hat, labels)
            batch_loss.backward()
            optimizer.step()

            total_loss += float(batch_loss)
            num_correct += int(torch.sum(torch.argmax(y_hat, dim=1) == labels))

            print(
                "EPOCH: {:5}    BATCH: {:5}/{:5}    LOSS: {:.3f}".format(
                    ep, batch_idx, len(train_loader), batch_loss
                )
            )

        epoch_loss = total_loss / len(trainset)
        epoch_accuracy = num_correct / len(trainset)

        print(
            "EPOCH: {:5}    LOSS: {:.3f}    ACCURACY: {:.3f}".format(
                ep, epoch_loss, epoch_accuracy
            )
        )

        model.eval()
        total_val_loss = 0
        num_val_correct = 0

        with torch.no_grad():
            for batch_idx, (val_inputs, val_labels) in enumerate(val_loader):
                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)

                total_val_loss += float(val_loss)
                num_val_correct += int(
                    torch.sum(torch.argmax(val_outputs, dim=1) == val_labels)
                )

        val_epoch_loss = total_val_loss / len(val_set)
        val_epoch_accuracy = num_val_correct / len(val_set)
        print(
            "EPOCH: {:5}    VAL LOSS: {:.3f}    VAL ACCURACY: {:.3f}".format(
                ep, val_epoch_loss, val_epoch_accuracy
            )
        )

        if val_epoch_loss < best_val_loss:
            best_val_loss = val_epoch_loss
            epochs_without_improvement = 0

            # save the best model checkpoint here
            torch.save(model.state_dict(), save_path)
        else:
            epochs_without_improvement += 1

        # Check if we should stop training
        if epochs_without_improvement >= patience:
            print(f"Early stopping after {ep + 1} epochs")
            break

    # Save the trained model

    print("Best Model saved!")
    print("Save path: ", save_path)


# Run training, save model and print metrics
@hydra.main(config_path="../config/", config_name="main.yaml")
def main(cfg):

    import time
    for i in [1, 2, 4]:
        # Print time for each run
        print(f"Running with {i} workers")
        start = time.time()
        train_model(cfg, i)
        end = time.time()
        print(f"Time taken: {round(end - start, 2)}")


main()


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path="../config/", config_name="main.yaml")
usage: ipykernel_launcher.py [--help] [--hydra-help] [--version]
                             [--cfg {job,hydra,all}] [--resolve]
                             [--package PACKAGE] [--run] [--multirun]
                             [--shell-completion] [--config-path CONFIG_PATH]
                             [--config-name CONFIG_NAME]
                             [--config-dir CONFIG_DIR]
                             [--experimental-rerun EXPERIMENTAL_RERUN]
                             [--info [{all,config,defaults,defaults-tree,plugins,searchpath}]]
                             [overrides ...]
ipykernel_launcher.py: error: argument --shell-completion/-sc: ignored explicit argument '9034'


AttributeError: 'tuple' object has no attribute 'tb_frame'