In [1]:
import torch, torchvision
import sys
from pathlib import Path

sys.path.append("/Users/arjunlfc/Documents/workspace/_mlmodels/")
from torchinfo import summary
from torch.utils.tensorboard import SummaryWriter
from torch import nn
from utils import data_setup, training
from tqdm.auto import tqdm

In [2]:
def get_device():
    if torch.cuda.is_available():
        return 'cuda'
    elif torch.backends.mps.is_available():
        return 'mps'
    return 'cpu'

def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

device = get_device()
set_seeds()


### Intro to Tracking Experiments

There are as many different ways to track machine learning experiments as there are experiments to run. 
- TensorBoard: Extensions built into PyTorch, widely recognized and used, easily scales.
- Weights & Biases: install wandb, make an account, Incredible user experience, make experiments public, tracks almost anything.
- MLFlow: install mlflow and start tracking, Fully open-source MLOps lifecycle management, many integrations.

## Data and Model

In [3]:
def download_data(dir_path, filename):
    # Setup path to data folder
    data_path = Path(dir_path)
    image_path = data_path / filename
    
    # If the image folder doesn't exist, download it and prepare it... 
    if image_path.is_dir():
        print(f"{image_path} directory exists.")
    else:
        print(f"Did not find {image_path} directory, creating one...")
        image_path.mkdir(parents=True, exist_ok=True)
        
        # Download pizza, steak, sushi data
        with open(data_path / f"{filename}.zip", "wb") as f:
            request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
            print("Downloading pizza, steak, sushi data...")
            f.write(request.content)
    
        # Unzip pizza, steak, sushi data
        with zipfile.ZipFile(data_path / f"{filename}.zip", "r") as zip_ref:
            print("Unzipping pizza, steak, sushi data...") 
            zip_ref.extractall(image_path)

DATASET_PATH="../2-NLP-CV-Basics/datasets/"
FILENAME="pizza_steak_sushi"
download_data(DATASET_PATH, FILENAME)

# Setup train and testing paths
train_dir = f"{DATASET_PATH}{FILENAME}/train"
test_dir = f"{DATASET_PATH}{FILENAME}/test"

train_dir, test_dir

../2-NLP-CV-Basics/datasets/pizza_steak_sushi directory exists.


('../2-NLP-CV-Basics/datasets/pizza_steak_sushi/train',
 '../2-NLP-CV-Basics/datasets/pizza_steak_sushi/test')

In [4]:
# Setup pretrained weights (plenty of these available in torchvision.models)
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Get transforms from weights (these are the transforms that were used to obtain the weights)
automatic_transforms = weights.transforms() 
print(f"Automatically created transforms: {automatic_transforms}")

# Create data loaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=automatic_transforms, # use automatic created transforms
    batch_size=32
)

train_dataloader, test_dataloader, class_names

Automatically created transforms: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x10a9149a0>,
 <torch.utils.data.dataloader.DataLoader at 0x10aa65b80>,
 ['pizza', 'steak', 'sushi'])

In [5]:
def get_model():
    # Download the pretrained weights for EfficientNet_B0
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # NEW in torchvision 0.13, "DEFAULT" means "best weights available"
    
    # Setup the model with the pretrained weights and send it to the target device
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)
    
    for param in model.features.parameters():
        param.requires_grad = False
        
    # Since we're creating a new layer with random weights (torch.nn.Linear), 
    # let's set the seeds
    set_seeds() 
    
    # Update the classifier head to suit our problem
    model.classifier = torch.nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=1280, 
                  out_features=len(class_names),
                  bias=True).to(device))
    return model

## Model Training

We can use PyTorch's `torch.utils.tensorboard.SummaryWriter()` class to save various parts of our model's training progress to file. The default location for log_dir is under runs/CURRENT_DATETIME_HOSTNAME, where the HOSTNAME is the name of your computer. But of course, you can change where your experiments are tracked (the filename is as customisable as you'd like). The outputs of the SummaryWriter() are saved in TensorBoard format.



In [6]:
def create_writer(experiment_name: str, 
                  model_name: str, 
                  extra: str=None) -> torch.utils.tensorboard.writer.SummaryWriter():
    """Creates a torch.utils.tensorboard.writer.SummaryWriter() instance saving to a specific log_dir.

    Example usage:
        # Create a writer saving to "runs/2022-06-04/data_10_percent/effnetb2/5_epochs/"
        writer = create_writer(experiment_name="data_10_percent",
                               model_name="effnetb2",
                               extra="5_epochs")
        # The above is the same as:
        writer = SummaryWriter(log_dir="runs/2022-06-04/data_10_percent/effnetb2/5_epochs/")
    """
    from datetime import datetime
    import os

    # Get timestamp of current date (all experiments on certain day live in same folder)
    timestamp = datetime.now().strftime("%Y-%m-%d") # returns current date in YYYY-MM-DD format

    if extra:
        # Create log directory path
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
        
    print(f"[INFO] Created SummaryWriter, saving to: {log_dir}...")
    return SummaryWriter(log_dir=log_dir)

We'll add the ability for our train() function to log our model's training and test loss and accuracy values. We can do this with writer.add_scalars(main_tag, tag_scalar_dict), where:
- main_tag (string) - the name for the scalars being tracked (e.g. "Accuracy")
- tag_scalar_dict (dict) - a dictionary of the values being tracked (e.g. {"train_loss": 0.3454})

Once we've finished tracking values, we'll call writer.close() to tell the writer to stop looking for values to track.
The `torch.utils.tensorboard.SummaryWriter()` class also has many different methods to track different things about your model/data, such as `add_graph()` which tracks the computation graph of your model. For more options, check the SummaryWriter() documentation.

In [7]:
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device,
          writer        
         ):
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = training.train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           device=device)
        test_loss, test_acc = training.test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        if writer:
            # Add loss results to SummaryWriter
            writer.add_scalars(main_tag="Loss", 
                               tag_scalar_dict={"train_loss": train_loss,
                                                "test_loss": test_loss},
                               global_step=epoch)
    
            # Add accuracy results to SummaryWriter
            writer.add_scalars(main_tag="Accuracy", 
                               tag_scalar_dict={"train_acc": train_acc,
                                                "test_acc": test_acc}, 
                               global_step=epoch)
            
            # Track the PyTorch model architecture
            writer.add_graph(model=model, 
                             # Pass in an example input
                             input_to_model=torch.randn(32, 3, 224, 224).to(device))
        
            writer.close()
            
            
    
    return results
    

### Demo Experiment and Tensorboard Calling

In [9]:
loss_fn = nn.CrossEntropyLoss()
model1 = get_model()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.01)

writer = create_writer(experiment_name="1", model_name="efficient_net")
training_results = train(model1, train_dataloader, test_dataloader, optimizer, loss_fn, 5, device, writer)

[INFO] Created SummaryWriter, saving to: runs/2024-11-04/1/efficient_net...


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0175 | train_acc: 0.5078 | test_loss: 0.3972 | test_acc: 0.8561
Epoch: 2 | train_loss: 0.7793 | train_acc: 0.7148 | test_loss: 0.2660 | test_acc: 0.9384
Epoch: 3 | train_loss: 0.6636 | train_acc: 0.7617 | test_loss: 0.4326 | test_acc: 0.7737
Epoch: 4 | train_loss: 0.4103 | train_acc: 0.8008 | test_loss: 0.3452 | test_acc: 0.8456
Epoch: 5 | train_loss: 0.3308 | train_acc: 0.8711 | test_loss: 0.7270 | test_acc: 0.7131


In [None]:
%load_ext tensorboard
%tensorboard --logdir runs