# Training

### Library import

In [None]:
from super_gradients import Trainer
from super_gradients.training import Trainer, training_hyperparams
from super_gradients.training import models
from super_gradients.common.object_names import Models

In [None]:
import os
import glob
import pprint
import torch
from matplotlib import pyplot as plt
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from torch.utils.tensorboard import SummaryWriter

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

### Paths organization

In [None]:
# Dataset
pwd_notebook = os.path.abspath('') # path notebook
root_path = os.path.dirname(os.path.dirname(pwd_notebook)) #path root project

data_dir = os.path.join(root_path, 'data') #path data

# train path
train_dir = os.path.join(data_dir, 'train')
print(train_dir)
# test path
test_dir = os.path.join(data_dir, 'test')
print(test_dir)
#valid path
valid_dir = os.path.join(data_dir, 'valid')
print(valid_dir)

In [None]:
# Checkpoints
name_folder = 'clasificador' #Add Change the name of the folder
experiment_name_value = 'distorsion_arquitectura' #Add Change the name of the experiment
CHECKPOINT_DIR = os.path.join(root_path, 'checkpoints', name_folder) # path checkpoints

# Create the checkpoint directory if it does not exist
if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)


pretrained_wegits_name = 'imagenet' #Add the name of the pretrained model
trainer = Trainer(experiment_name=experiment_name_value, ckpt_root_dir=CHECKPOINT_DIR)
trainer

In [None]:
#Add parameters
# number of classes
n_classes = 4 #Add for your number classes

#number epochs
n_epochs = 30 #Add number checkpoints

# learning rate
lr_value = 0.1 #Add learning rate

# size of image for resize
size_image = (64, 64) #Add size image

### Transformation

In [None]:
# Write transform for image
data_transform = Compose([
    # Resize the images to 64x64
    # transforms.Resize(size=size_image),
    # Flip the images randomly on the horizontal
    transforms.RandomHorizontalFlip(p=0.5), # p = probability of flip, 0.5 = 50% chance
    # Turn the image into a torch.Tensor
    transforms.ToTensor() # this also converts all pixel values from 0 to 255 to be between 0.0 and 1.0
])

### Assigning train, test and valid

In [None]:
train_data = datasets.ImageFolder(root=train_dir, # target folder of images
                                  transform=data_transform, # transforms to perform on data (images)
                                  target_transform=None) # transforms to perform on labels (if necessary)

valid_data = datasets.ImageFolder(root=valid_dir, 
                                 transform=data_transform,
                                 target_transform=None)

test_data = datasets.ImageFolder(root=test_dir, 
                                 transform=data_transform,
                                 target_transform=None)

print(f"Train data:\n{train_data}\nValid data:\n{valid_data}\nTest data:\n{test_data}")

In [None]:
# Get class names as a list
class_names = train_data.classes
print(class_names)
# Can also get class names as a dict
class_dict = train_data.class_to_idx
print(class_dict)
# Check the lengths
len(train_data), len(valid_data)

In [None]:
img, label = train_data[5][0], train_data[5][1]
print(f"Image tensor:\n{img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")

In [None]:
# Rearrange the order of dimensions
img_permute = img.permute(1, 2, 0)

# Print out different shapes (before and after permute)
print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image permute shape: {img_permute.shape} -> [height, width, color_channels]")

# Plot the image
plt.figure(figsize=(10, 7))
plt.imshow(img.permute(1, 2, 0))
plt.axis("off")
plt.title(class_names[label], fontsize=14)

### Training settings

In [None]:
train_dataloader = DataLoader(dataset=train_data, 
                              batch_size=16, # how many samples per batch?
                              num_workers=10, # how many subprocesses to use for data loading? (higher = more)
                              shuffle=True) # shuffle the data?

valid_dataloader = DataLoader(dataset=valid_data, 
                             batch_size=16, 
                             num_workers=10, 
                             shuffle=True) # don't usually need to shuffle testing data

test_dataloader = DataLoader(dataset=test_data, 
                             batch_size=16, 
                             num_workers=20, 
                             shuffle=False) # don't usually need to shuffle testing data

train_dataloader, valid_dataloader, test_dataloader

In [None]:
img, label = next(iter(train_dataloader))

# Batch size will now be 1, try changing the batch_size parameter above and see what happens
print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label.shape}")
print(f"Train data:\n{train_data}\nValid data:\n{valid_data}\nTest data:\n{test_data}")

### Model

In [None]:
model = models.get(
    model_name=Models.RESNET50, #Add the name of the model
    num_classes=n_classes, 
    pretrained_weights=pretrained_wegits_name)
print(model.train)

In [None]:
# you can see more recipes in super_gradients/recipes
training_params =  training_hyperparams.get("training_hyperparams/imagenet_resnet50_train_params")

pprint.pprint("Training parameters")
pprint.pprint(training_params)

In [None]:
# Add the parameters you want
training_params["max_epochs"] = n_epochs
training_params["launch_tensorboard"] = True
training_params["sg_logger_params"]["launch_tensorboard"] = True
training_params["train_metrics_list"] = ["Accuracy"]
training_params["valid_metrics_list"] = ["Accuracy"]
training_params["initial_lr"] = lr_value

pprint.pprint("Training parameters Current")
pprint.pprint(training_params)

### Training visualization

In [None]:
# function for find the last checkpoint

def find_last_run_dir(base_dir):
    # Builds the search pattern to identify folders starting with "RUN_"
    search_pattern = os.path.join(base_dir, "RUN_*")
    # Finds all folders that match the pattern
    list_run_dirs = glob.glob(search_pattern)
    # If no directories are found, it returns None
    if not list_run_dirs:
        return None
    
    # Sort directories by date and time from most recent to oldest
    latest_list_run_dirs = sorted(list_run_dirs, key=os.path.getmtime, reverse=True)[0]
    return latest_list_run_dirs

In [None]:
# Show logs of model
log_dir = os.path.join(CHECKPOINT_DIR, experiment_name_value)
latest_log_dir = find_last_run_dir(log_dir)

%load_ext tensorboard
%tensorboard --logdir='{latest_log_dir}'
%reload_ext tensorboard

print(f"Last log dir: {latest_log_dir}")

### Train

In [None]:
trainer.train(model=model,
              training_params=training_params,
              train_loader=train_dataloader,
              valid_loader=valid_dataloader)
