In [1]:
%reset -f

%pip install -q timm


import os, re, torch, PIL
import numpy as np

from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Compose, RandomResizedCrop, Resize, ToTensor

from accelerate import Accelerator
from accelerate.utils import set_seed
from timm import create_model



# Device configuration (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


# Download the dataset
!wget -nc https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz

# Extract the tar.gz file
!tar -xzf images.tar.gz


import os
data_dir = "./images"
fnames = os.listdir(data_dir)
fname = fnames[0]
print(fname)


import re
def extract_label(fname):
    stem = fname.split(os.path.sep)[-1]
    return re.search(r"^(.*)_\d+\.jpg$", stem).groups()[0]

extract_label(fname)



class PetsDataset(Dataset):
    def __init__(self, file_names, image_transform=None, label_to_id=None):
        self.file_names = file_names
        self.image_transform = image_transform
        self.label_to_id = label_to_id

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx):
        fname = self.file_names[idx]
        raw_image = PIL.Image.open(fname)
        image = raw_image.convert("RGB")
        if self.image_transform is not None:
            image = self.image_transform(image)
        label = extract_label(fname)
        if self.label_to_id is not None:
            label = self.label_to_id[label]
        return {"image": image, "label": label}



# Grab all the image filenames
fnames = [
    os.path.join(data_dir, fname)
    for fname in fnames
    if fname.endswith(".jpg")
]

# Build the labels
all_labels = [
    extract_label(fname)
    for fname in fnames
]
id_to_label = list(set(all_labels))
id_to_label.sort()
label_to_id = {lbl: i for i, lbl in enumerate(id_to_label)}



def get_dataloaders(batch_size:int=64):
    "Builds a set of dataloaders with a batch_size"
    random_perm = np.random.permutation(len(fnames))
    cut = int(0.8 * len(fnames))
    train_split = random_perm[:cut]
    eval_split = random_perm[:cut]

    # For training we use a simple RandomResizedCrop
    train_tfm = Compose([
        RandomResizedCrop((224, 224), scale=(0.5, 1.0)),
        ToTensor()
    ])
    train_dataset = PetsDataset(
        [fnames[i] for i in train_split],
        image_transform=train_tfm,
        label_to_id=label_to_id
    )

    # For evaluation we use a deterministic Resize
    eval_tfm = Compose([
        Resize((224, 224)),
        ToTensor()
    ])
    eval_dataset = PetsDataset(
        [fnames[i] for i in eval_split],
        image_transform=eval_tfm,
        label_to_id=label_to_id
    )

    # Instantiate dataloaders
    train_dataloader = DataLoader(
        train_dataset,
        shuffle=True,
        batch_size=batch_size,
        num_workers=4
    )
    eval_dataloader = DataLoader(
        eval_dataset,
        shuffle=False,
        batch_size=batch_size*2,
        num_workers=4
    )
    return train_dataloader, eval_dataloader



from torch.optim.lr_scheduler import CosineAnnealingLR

# instantiate the model (we build the model here so that the seed also controls new weight initaliziations)
accelerator = None
mean = None
std = None
model = None
optimizer = None
lr_scheduler = None
eval_dataloader = None


def training_loop(mixed_precision="fp16", seed:int=42, batch_size:int=64, ep:int = 1):
    global model , accelerator, mean, std, optimizer, lr_scheduler, eval_dataloader
    # Set the seed
    set_seed(seed)
    # Initialize accelerator
    accelerator = Accelerator(mixed_precision=mixed_precision)
    # Build dataloaders
    train_dataloader, eval_dataloader = get_dataloaders(batch_size)
    model = create_model("resnet50d", pretrained=True, num_classes=len(label_to_id))
    # Freeze the base model
    for param in model.parameters():
        param.requires_grad=False
    for param in model.get_classifier().parameters():
        param.requires_grad=True

    # We normalize the batches of images to be a bit faster
    mean = torch.tensor(model.default_cfg["mean"])[None, :, None, None]
    std = torch.tensor(model.default_cfg["std"])[None, :, None, None]

    # To make this constant available on the active device, we set it to the accelerator device
    mean = mean.to(accelerator.device)
    std = std.to(accelerator.device)

    # Intantiate the optimizer
    optimizer = torch.optim.Adam(params=model.parameters(), lr = 3e-2/25)

    # Instantiate the learning rate scheduler
    lr_scheduler = OneCycleLR(
        optimizer=optimizer,
        max_lr=3e-2,
        epochs=ep,
        steps_per_epoch=len(train_dataloader)
    )

    # Prepare everything
    # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the
    # prepare method.
    model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
        model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
    )

    # Now we train the model
    for epoch in range(ep):
        model.train()
        for step, batch in enumerate(train_dataloader):
            # We could avoid this line since we set the accelerator with `device_placement=True`.
            batch = {k: v.to(accelerator.device) for k, v in batch.items()}
            inputs = (batch["image"] - mean) / std
            outputs = model(inputs)
            loss = torch.nn.functional.cross_entropy(outputs, batch["label"])
            accelerator.backward(loss)
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

        model.eval()
        accurate = 0
        num_elems = 0
        for _, batch in enumerate(eval_dataloader):
            # We could avoid this line since we set the accelerator with `device_placement=True`.
            batch = {k: v.to(accelerator.device) for k, v in batch.items()}
            inputs = (batch["image"] - mean) / std
            with torch.no_grad():
                outputs = model(inputs)
            predictions = outputs.argmax(dim=-1)
            accurate_preds = accelerator.gather(predictions) == accelerator.gather(batch["label"])
            num_elems += accurate_preds.shape[0]
            accurate += accurate_preds.long().sum()

        eval_metric = accurate.item() / num_elems
        # Use accelerator.print to print only on the main process.
        accelerator.print(f"epoch {epoch}: {100 * eval_metric:.2f}")
    return model, mean, std , accelerator



def preprocess_image(image_path: str, mean: torch.Tensor, std: torch.Tensor, accelerator: Accelerator):
    transform = Compose([
        Resize((224, 224)),
        ToTensor(),
    ])

    # Load and transform the image
    image = PIL.Image.open(image_path).convert("RGB")
    image = transform(image)

    # Normalize the image
    image = image.to(accelerator.device)
    mean = mean.to(accelerator.device)
    std = std.to(accelerator.device)
    image = (image - mean) / std



    # Print the tensor shape for debugging
    print(f"Preprocessed image tensor shape: {image.shape}")

    return image




def evaluate_model(model, dataloader, mean, std, accelerator):
    model.to(accelerator.device)
    mean.to(accelerator.device)
    std.to(accelerator.device)
    model.eval()  # Set the model to evaluation mode
    accurate = 0
    num_elems = 0

    with torch.no_grad():  # No need to compute gradients during evaluation
        for _, batch in enumerate(dataloader):
            # Move batch to the accelerator device
            batch = {k: v.to(accelerator.device) for k, v in batch.items()}
            inputs = (batch["image"] - mean) / std

            # Forward pass
            outputs = model(inputs)
            predictions = outputs.argmax(dim=-1)

            # Calculate accuracy
            accurate_preds = accelerator.gather(predictions) == accelerator.gather(batch["label"])
            num_elems += accurate_preds.shape[0]
            accurate += accurate_preds.long().sum()

    # Compute evaluation metric
    eval_metric = accurate.item() / num_elems
    # Print the result using accelerator.print to ensure it's only printed from the main process
    accelerator.print(f"Evaluation accuracy: {100 * eval_metric:.2f}%")

    return eval_metric



def save_model(model, mean, std, optimizer, lr_scheduler, epoch, path="model_checkpoint.pth"):

    save_dict = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'lr_scheduler_state_dict': lr_scheduler.state_dict(),
        'mean': mean,
        'std': std,
        'epoch': epoch
    }

    torch.save(save_dict, path)
    print(f"Model and training state saved to {path}")



# Loading the model, optimizer, and lr_scheduler
def load_model(path="model_checkpoint.pth"):
    checkpoint = torch.load(path)

    # Create the model again (needs to match the original architecture)
    model = create_model("resnet50d", pretrained=False, num_classes=len(label_to_id))

    # Load the model state dictionary
    model.load_state_dict(checkpoint['model_state_dict'])

    # Load the optimizer state if resuming training
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-2/25)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    # Load the lr_scheduler state
    lr_scheduler = OneCycleLR(optimizer, max_lr=3e-2, epochs=1, steps_per_epoch=100)
    lr_scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])

    # Load mean and std normalization parameters
    mean = checkpoint['mean']
    std = checkpoint['std']

    epoch = checkpoint['epoch']

    print(f"Model loaded from {path}, trained for {epoch} epochs")

    return model, optimizer, lr_scheduler, mean, std, epoch



from accelerate import notebook_launcher



notebook_launcher(training_loop, args=("fp16", 42, 64), num_processes=2)






[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hcpu
--2024-09-10 12:13:18--  https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://thor.robots.ox.ac.uk/pets/images.tar.gz [following]
--2024-09-10 12:13:18--  https://thor.robots.ox.ac.uk/pets/images.tar.gz
Resolving thor.robots.ox.ac.uk (thor.robots.ox.ac.uk)... 129.67.95.98
Connecting to thor.robots.ox.ac.uk (thor.robots.ox.ac.uk)|129.67.95.98|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 791918971 (755M) [application/octet-stream]
Saving to: ‘images.tar.gz’


2024-09-10 12:13:38

FileNotFoundError: [Errno 2] No such file or directory: 'nvidia-smi'

In [None]:
### for fun predict a path


# Example image path
image_path = "./images/chihuahua_44.jpg"  # Replace with the actual image path

# Assuming mean, std, accelerator, and trained_model are already defined
image = preprocess_image(image_path, mean, std, accelerator)

# Ensure trained_model is on the right device
model.to(accelerator.device)

model.eval()
with torch.no_grad():
    # Print the input tensor shape for debugging
    print(f"Input tensor shape before model inference: {image.shape}")

    # Perform inference
    outputs = model(image)  # Ensure image is correctly shaped [1, 3, 224, 224]

    # Print the output shape for debugging
    print(f"Model output shape: {outputs.shape}")

    # Get the predicted class
    predicted_class = outputs.argmax(dim=-1).item()

# Convert predicted class to label
id_to_label = {v: k for k, v in label_to_id.items()}
predicted_label = id_to_label[predicted_class]
print(f"Predicted label for {image_path}: {predicted_label}")

In [None]:
evaluate_model(model, eval_dataloader, mean, std, accelerator)

In [None]:
model_state = model.float().state_dict()

model_stated = create_model("resnet50d", pretrained=False, num_classes=len(label_to_id))

# Load the model state dictionary
model_stated.load_state_dict(model_state)

model_stated.to(accelerator.device)
model_stated_state = model_stated.state_dict()



def compute_model_state_difference(model_state, model_stated_state):
    # Initialize variable to hold the total difference
    total_diff = 0.0

    # Iterate through all parameters
    for key in model_state:
        if key in model_stated_state:
            param1 = model_state[key]
            param2 = model_stated_state[key]

            # Compute the L2 norm of the difference
            diff = param1 - param2
            norm = torch.norm(diff).item()
            total_diff += norm

    return total_diff

# Calculate the norm between the two model states
state_difference = compute_model_state_difference(model_state, model_stated_state)
print(f"Total norm of the difference between model states: {state_difference:.4f}")


evaluate_model(model_stated, eval_dataloader, mean, std, accelerator)

In [None]:

# Call save_model at the end of training
save_model(model, mean, std, optimizer, lr_scheduler, epoch=1, path="resnet50d_pets.pth")
evaluate_model(model, eval_dataloader, mean, std, accelerator)


In [None]:

# Call load_model when needed
loaded_model, loaded_optimizer, loaded_lr_scheduler, loaded_mean, loaded_std, loaded_epoch = load_model("resnet50d_pets.pth")

evaluate_model(loaded_model, eval_dataloader, mean, std, accelerator)
