In [None]:
! pip install opencv-python tqdm numpy torch matplotlib

In [26]:
import os
import cv2
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader, random_split

import torchvision
import torch.optim as optim
import torch.nn as nn

import numpy as np

# from PIL import Image

import matplotlib.pyplot as plt

# FDDB Parsing

In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

# ===============================================
# CONFIG
# ===============================================

dataset_path = r"C:\PROJECTS\MILO\MILO\FaceRecognition\data\Dataset_FDDB\images"
labels_path = r"C:\PROJECTS\MILO\MILO\FaceRecognition\data\Dataset_FDDB\label.txt"
output_dir  = r"C:\PROJECTS\MILO\MILO\FaceRecognition\data\Augmented"

output_label_file = os.path.join(output_dir, "all_augmentations.txt")

TARGET_HEIGHT, TARGET_WIDTH = 1080, 1920

# Example: if you truly want rotations at 120° and 240°,
# you need a custom rotation. Otherwise, keep [0, 180].
ROTATIONS = [0, 120, 240]  

SCALES = [0.5, 1.0, 1.5]
ALIGNMENT_X = [0.0, 0.5, 1.0] #in theory we can have 2 instead of 3 if we really need to cut it down
ALIGNMENT_Y = [0.0, 0.5, 1.0]
NOISE_VARIANTS = [ "gaussian"] # "none", <- we can just have noisy images

UPSCALE_INTERPOLATION   = cv2.INTER_LANCZOS4
DOWNSCALE_INTERPOLATION = cv2.INTER_AREA

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# ===============================================
# FUNCTIONS
# ===============================================

def parse_labels(label_path):
    """
    Reads the label file and returns a dict:
       image_path -> list of bounding boxes [(x_min, y_min, x_max, y_max), ...]
    """
    labels = {}
    with open(label_path, 'r') as f:
        lines = f.readlines()
        current_image = None
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if line.startswith("#"):
                # e.g. "# 2003/07/19/img_123.jpg"
                current_image = line[2:].strip()
                labels[current_image] = []
            else:
                bbox = list(map(int, line.split()))
                labels[current_image].append(bbox)
    return labels

def adjust_brightness(image, factor):
    """
    Adjusts brightness by scaling pixel values by 'factor'.
    """
    # convert to float, scale, clip back to [0, 255], convert to uint8
    out = image.astype(np.float32) * factor
    out = np.clip(out, 0, 255).astype(np.uint8)
    return out

def add_gaussian_noise(image, std=25):
    """
    Adds Gaussian noise with standard deviation 'std'.
    Returns a uint8 image with saturation/clip.
    """
    # Convert to float for adding possibly negative values
    float_image = image.astype(np.float32)
    noise = np.random.normal(0, std, image.shape).astype(np.float32)
    noisy = float_image + noise
    noisy = np.clip(noisy, 0, 255).astype(np.uint8)
    return noisy

def rotate_image(image, angle):
    """
    Rotate 'image' by 'angle' degrees around its center, 
    returning a new image that fits the entire rotated result.
    For angles other than 0, 90, 180, 270, the output shape grows
    to avoid cropping corners.
    """
    if angle % 360 == 0:
        return image  # No rotation needed
    
    (h, w) = image.shape[:2]
    center = (w / 2.0, h / 2.0)

    # Rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1.0)

    # Compute the new bounding dimensions
    cos_val = abs(M[0, 0])
    sin_val = abs(M[0, 1])
    new_w = int((h * sin_val) + (w * cos_val))
    new_h = int((h * cos_val) + (w * sin_val))

    # Adjust the rotation matrix to take into account translation
    M[0, 2] += (new_w / 2.0) - center[0]
    M[1, 2] += (new_h / 2.0) - center[1]

    # Perform the rotation
    rotated = cv2.warpAffine(image, M, (new_w, new_h))
    return rotated

def place_image_on_canvas(image, target_height, target_width, align_x, align_y):
    """
    Places 'image' onto a black canvas of shape (target_height, target_width),
    using alignment factors [0.0, 1.0] in x and y.

    Returns:
      canvas: the final image of shape (target_height, target_width, 3)
      (offset_x, offset_y): where the top-left of 'image' was placed
    """
    canvas = np.zeros((target_height, target_width, 3), dtype=np.uint8)
    h, w = image.shape[:2]

    # Ensure we don't exceed the canvas if image is larger
    # (You could also choose to resize if bigger, or skip)
    if h > target_height or w > target_width:
        # A simple fallback: fit it exactly by resizing
        image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_AREA)
        h, w = image.shape[:2]

    offset_x = int(align_x * (target_width - w))
    offset_y = int(align_y * (target_height - h))

    canvas[offset_y:offset_y+h, offset_x:offset_x+w] = image
    return canvas, (offset_x, offset_y)

def transform_bboxes(bboxes, scale, angle, offset_x, offset_y,
                     original_w, original_h, final_w, final_h):
    """
    Example of how you might transform bounding boxes for:
      1) scaling
      2) rotation (arbitrary angle)
      3) offset (due to placement on the canvas)
    """
    # 1) scale bboxes
    scaled_bboxes = []
    for (x_min, y_min, x_max, y_max) in bboxes:
        scaled_bboxes.append([
            x_min * scale, 
            y_min * scale, 
            x_max * scale, 
            y_max * scale
        ])

    if angle % 360 == 0:
        # Only offset
        final_bboxes = []
        for (sx_min, sy_min, sx_max, sy_max) in scaled_bboxes:
            final_bboxes.append((
                int(sx_min + offset_x), 
                int(sy_min + offset_y),
                int(sx_max + offset_x),
                int(sy_max + offset_y)
            ))
        return final_bboxes

    # Build rotation matrix for 'angle' about the center of the scaled image
    center_x = (scale * original_w) / 2.0
    center_y = (scale * original_h) / 2.0
    M = cv2.getRotationMatrix2D((center_x, center_y), angle, 1.0)

    # Figure out new bounding dimension after rotation
    cos_val = abs(M[0, 0])
    sin_val = abs(M[0, 1])
    new_w = int((scale * original_h) * sin_val + (scale * original_w) * cos_val)
    new_h = int((scale * original_h) * cos_val + (scale * original_w) * sin_val)

    # Adjust the rotation matrix for translation so the top-left corner is at (0,0)
    M[0, 2] += (new_w / 2.0) - center_x
    M[1, 2] += (new_h / 2.0) - center_y

    final_bboxes = []
    for (sx_min, sy_min, sx_max, sy_max) in scaled_bboxes:
        # Convert to corner points
        corners = np.array([
            [sx_min, sy_min],
            [sx_min, sy_max],
            [sx_max, sy_min],
            [sx_max, sy_max]
        ], dtype=np.float32)
        # Add 1 for the affine
        ones = np.ones((4, 1), dtype=np.float32)
        corners_ones = np.hstack([corners, ones])

        # Transform corners
        transformed = M @ corners_ones.T  # shape (2,4)

        tx = transformed[0, :]
        ty = transformed[1, :]
        min_x, max_x = np.min(tx), np.max(tx)
        min_y, max_y = np.min(ty), np.max(ty)

        # Now offset for final placement on the canvas
        final_xmin = int(min_x + offset_x)
        final_ymin = int(min_y + offset_y)
        final_xmax = int(max_x + offset_x)
        final_ymax = int(max_y + offset_y)

        # Clip to valid range if desired
        final_xmin = max(0, min(final_xmin, final_w - 1))
        final_ymin = max(0, min(final_ymin, final_h - 1))
        final_xmax = max(0, min(final_xmax, final_w - 1))
        final_ymax = max(0, min(final_ymax, final_h - 1))

        final_bboxes.append((final_xmin, final_ymin, final_xmax, final_ymax))
    
    return final_bboxes

def process_image(args):
    """
    Processes a single image with the desired augmentations
    and writes them to disk. Returns a list of (aug_filename, new_bboxes).
    """
    image_path, bboxes = args
    full_img_path = os.path.join(dataset_path, image_path)

    image = cv2.imread(full_img_path)
    if image is None:
        # Skip missing or unreadable images
        return []

    original_h, original_w = image.shape[:2]
    augmented_files = []

    for scale in SCALES:
        # Choose upscale vs. downscale interpolation
        interp = UPSCALE_INTERPOLATION if scale >= 1.0 else DOWNSCALE_INTERPOLATION
        scaled_img = cv2.resize(image, None, fx=scale, fy=scale, interpolation=interp)

        for ax in ALIGNMENT_X:
            for ay in ALIGNMENT_Y:
                # Place scaled_img on the 1920x1080 canvas at alignment offsets
                placed_img, (offset_x, offset_y) = place_image_on_canvas(
                    scaled_img, TARGET_HEIGHT, TARGET_WIDTH, ax, ay
                )

                for angle in ROTATIONS:
                    # Rotate the entire 1920x1080 canvas
                    rotated_img = rotate_image(placed_img, angle)

                    # For bounding-box transformations:
                    # We transform them from original to final coords
                    final_h, final_w = rotated_img.shape[:2]
                    transformed_bboxes = transform_bboxes(
                        bboxes,
                        scale=scale,
                        angle=angle,
                        offset_x=offset_x,
                        offset_y=offset_y,
                        original_w=original_w,
                        original_h=original_h,
                        final_w=final_w,
                        final_h=final_h
                    )

                    for noise_type in NOISE_VARIANTS:
                        if noise_type == "gaussian":
                            noisy_img = add_gaussian_noise(rotated_img)
                        else:
                            noisy_img = rotated_img

                        # Different brightness variations
                        for variation, factor in [ # ("original", 1.0), <- original is good, but the harder tasks will still teach it
                                                  ("shaded", 0.5),
                                                  ("bright", 1.5)]:
                            final_img = adjust_brightness(noisy_img, factor)

                            base_name = os.path.basename(os.path.splitext(image_path)[0])  # Get only the filename
                            out_name = (
                                f"{base_name}"
                                f"_s{scale}"
                                f"_ax{ax}"
                                f"_ay{ay}"
                                f"_r{angle}"
                                f"_n{noise_type}"
                                f"_{variation}.jpg"
                            )
                            out_path = os.path.join(output_dir, out_name)  # Ensures no subdirectories


                            cv2.imwrite(out_path, final_img)
                            # print("Finish Writing" + str(out_path))
                            augmented_files.append((out_name, transformed_bboxes))

    return augmented_files

# ===============================================
# SINGLE-THREADED EXECUTION
# ===============================================

if __name__ == "__main__":
    print("Start Label Parsing")
    labels_dict = parse_labels(labels_path)
    print("Finish Label Parsing")
    # Convert dict to a list of (image_path, list_of_bboxes)
    images_list = list(labels_dict.items())
    images_list = images_list[:len(images_list)//10]

    print("Start Image Augmentation")
    # Single-threaded loop with tqdm
    all_results = []
    for item in tqdm(images_list, desc="Processing"):
        aug_data = process_image(item)
        all_results.append(aug_data)
    print("Finish Image Augmentation")
    print("Start Box Writing")
    # Write bounding box annotations for all augmentations
    with open(output_label_file, "w") as label_out:
        for result in all_results:
            for filename, bboxes in result:
                label_out.write(f"# {filename}\n")
                for x_min, y_min, x_max, y_max in bboxes:
                    label_out.write(f"{x_min} {y_min} {x_max} {y_max}\n")


# Prep Dataset for Use

In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class FaceDataset(Dataset):
    def __init__(self, images_folder, labels_file, transform = None):
        self.images_folder = images_folder
        self.transform = transform # technically should be a parameter, but due to situation, we are doing that seperately beforehand
        self.data = []

        with open(labels_file, "r") as f:
            lines = f.readlines()
            image_path = None
            boxes = []
            for line in lines:
                line = line.strip()
                if line.startswith("#"):
                    if image_path:  # Save previous image
                        self.data.append((image_path, boxes))
                    image_path = os.path.join(images_folder, line[2:])
                    boxes = []
                else:
                    boxes.append(list(map(int, line.split())))
            if image_path:  # Save last image
                self.data.append((image_path, boxes))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path, boxes = self.data[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # image = Image.open(image).convert('RGB')

        # keep in for easy refactor in the future
        if self.transform:
            image = self.transform(image)

        # # Convert to PyTorch tensor
        image = torch.from_numpy(image).float().permute(2, 0, 1) / 255.0  # Normalize to [0,1]

        # Convert bounding boxes to tensor
        boxes = torch.tensor(boxes, dtype=torch.float32)

        # Labels (assuming all objects belong to class 1, since it's face detection)
        labels = torch.ones((boxes.shape[0],), dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}
        return image, target


In [30]:
full_images_path = r"C:\PROJECTS\MILO\MILO\FaceRecognition\data\Augmented"
full_labels_path = r"C:\PROJECTS\MILO\MILO\FaceRecognition\data\Augmented\all_augmentations.txt"
dataset = FaceDataset(full_images_path, full_labels_path)

print(len(dataset))

train_size = int(0.8 * len(dataset))

indices = torch.randperm(len(dataset)).tolist()
train_indices, test_indices = indices[:train_size], indices[train_size:]

train_dataset = torch.utils.data.Subset(dataset, train_indices)
test_dataset = torch.utils.data.Subset(dataset, test_indices)

def collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

102060


# NEW MODEL TRAINING

In [31]:
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights, ssdlite
from torchvision.models.detection.anchor_utils import DefaultBoxGenerator
import math

# # Check if CUDA is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load SSD300 with VGG16 backbone
model = ssdlite320_mobilenet_v3_large(num_classes = 2)  # Set to True if you want pretrained weights
# model.to(device)



In [32]:
# Initialize lists to store losses
train_losses = []
eval_losses = []

# Live plotting function
def plot_losses():
    plt.figure(figsize=(8, 5))
    plt.plot(train_losses, label="Train Loss", marker="o")
    plt.plot(eval_losses, label="Eval Loss", marker="o", linestyle="dashed")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training vs Evaluation Loss")
    plt.legend()
    plt.grid()
    plt.show(block=False)
    plt.pause(0.1)  # Pause to update the plot

In [33]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
prev_eval = math.inf
# Training Loop with tqdm & Live Graph
def train_model(model, train_loader, test_loader, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_train_loss = 0
        # tqdm progress bar for training
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for images, targets in progress_bar:
            optimizer.zero_grad()
            # Move data to GPU if available
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            outputs = model(images, targets)

            # Compute loss
            # loss = criterion(outputs, targets)
            loss = sum(loss for loss in outputs.values())
            loss.backward()
            optimizer.step()
            
            total_train_loss += loss.item()
            progress_bar.set_postfix(loss=f"{loss.item():.4f}")

        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Evaluate Model After Each Epoch
        avg_eval_loss = evaluate_model(model, test_loader)
        eval_losses.append(avg_eval_loss)

        if avg_eval_loss < prev_eval:
            torch.save(model.state_dict(), r"C:\PROJECTS\MILO\MILO\FaceRecognition\model_multi.pth")


        # Print Epoch Summary
        print(f"\nEpoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Eval Loss = {avg_eval_loss:.4f}")


# Evaluation Function with tqdm Progress Bar
def evaluate_model(model, test_loader):
    model.train()
    total_loss = 0

    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc="Evaluating", unit="batch")
        for images, targets in progress_bar:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # When targets are provided, the model returns a dictionary of losses.
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())
            total_loss += loss.item()

            progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_loss = total_loss / len(test_loader)
    return avg_loss

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [34]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
print(device)
model.to(device)
# Train Model with Evaluation Each Epoch
train_model(model, train_loader, test_loader, num_epochs=20)
plot_losses()

cuda


Epoch 1/20: 100%|██████████| 20412/20412 [1:19:18<00:00,  4.29batch/s, loss=3.3220] 
Evaluating: 100%|██████████| 5103/5103 [18:35<00:00,  4.58batch/s, loss=2.8462] 



Epoch 1: Train Loss = 4.4925, Eval Loss = 4.1635


Epoch 2/20: 100%|██████████| 20412/20412 [1:17:09<00:00,  4.41batch/s, loss=4.9906] 
Evaluating: 100%|██████████| 5103/5103 [22:48<00:00,  3.73batch/s, loss=2.6169] 



Epoch 2: Train Loss = 4.0726, Eval Loss = 3.9793


Epoch 3/20: 100%|██████████| 20412/20412 [1:41:12<00:00,  3.36batch/s, loss=3.9306] 
Evaluating: 100%|██████████| 5103/5103 [21:09<00:00,  4.02batch/s, loss=2.9721] 



Epoch 3: Train Loss = 3.9190, Eval Loss = 3.8657


Epoch 4/20:  73%|███████▎  | 14821/20412 [56:30<21:19,  4.37batch/s, loss=5.0928] 


KeyboardInterrupt: 

# Model Training

## SSDLite

In [None]:
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights, ssdlite
from torchvision.models.detection.anchor_utils import DefaultBoxGenerator
import math

# # Check if CUDA is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load SSD300 with VGG16 backbone
model = ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)  # Set to True if you want pretrained weights
# model.to(device)

# Get the number of input features for the classification head
in_channels = [list(m.parameters())[0].shape[0] for m in model.head.classification_head.module_list]

# # Check input channels
# num_anchors = model.head.classification_head.module_list[0][1].out_channels // 91  # Default COCO classes is 91

# dummy_input = torch.randn(1, 3, 1080, 1920)
# features = model.backbone(dummy_input)
# feature_map_shapes = [f.shape[-2:] for f in features.values()]


# # For example, using the height (1080) as the reference:
# new_anchor_generator = get_new_anchor_generator(1080, feature_map_shapes)
# # Replace the model's anchor generator:
# model.anchor_generator = new_anchor_generator


# # Modify classification head to have 2 classes 
# model.head.classification_head.num_classes = 2  # Update class count
# model.head.classification_head = ssdlite.SSDLiteClassificationHead(
#     in_channels = in_channels,
#     num_anchors = [num_anchors], 
#     num_classes = 2,
#     norm_layer = nn.BatchNorm2d
# )

# Compute the number of feature maps from the backbone
dummy_input = torch.randn(1, 3, 1080, 1920)
features = model.backbone(dummy_input)
feature_map_shapes = [f.shape[-2:] for f in features.values()]
num_feature_maps = len(feature_map_shapes)

# Define aspect ratios (one per feature map)
aspect_ratios = [[1.0, 2.0, 0.5]] * num_feature_maps

# Compute the number of anchors per feature map:
# (Typically, it's len(aspect_ratios[i]) + 1 for the extra anchor)
num_anchors_list = [len(ratios) + 1 for ratios in aspect_ratios]  # e.g. [4, 4, ..., 4]

def get_new_anchor_generator(input_size, feature_map_shapes, aspect_ratios=None):
    num_feature_maps = len(feature_map_shapes)
    s_min, s_max = 0.2, 0.9
    scales = [s_min + (s_max - s_min) * k / num_feature_maps for k in range(num_feature_maps + 1)]
    
    if aspect_ratios is None:
        aspect_ratios = [[1.0, 2.0, 0.5]] * num_feature_maps
    
    anchor_generator = DefaultBoxGenerator(aspect_ratios, scales=scales)
    return anchor_generator

new_anchor_generator = get_new_anchor_generator(1080, feature_map_shapes, aspect_ratios=aspect_ratios)
model.anchor_generator = new_anchor_generator

# Compute the in_channels for each feature map head as before:
in_channels = [list(m.parameters())[0].shape[0] for m in model.head.classification_head.module_list]

model.head.classification_head = ssdlite.SSDLiteClassificationHead(
    in_channels=in_channels,
    num_anchors=num_anchors_list,  # now a list for each feature map
    num_classes=2,
    norm_layer=nn.BatchNorm2d
)

model.head.regression_head = ssdlite.SSDLiteRegressionHead(
    in_channels=in_channels,
    num_anchors=num_anchors_list,
    norm_layer=nn.BatchNorm2d
)


# Modify for 1080p Input
model.size = (1080, 1920)

# Freeze all layers by default
for param in model.parameters():
    param.requires_grad = False

# # Unfreeze the first few layers of the backbone
# for layer in list(model.backbone.features)[:1]:  # Modify the number as needed
#     for param in layer.parameters():
#         param.requires_grad = True

# Unfreeze the last layers of the detection head (classification + box regression)
for param in model.head.classification_head.parameters():
    param.requires_grad = True

for param in model.head.regression_head.parameters():
    param.requires_grad = True

# # Print which layers are trainable
# trainable_layers = [name for name, param in model.named_parameters() if param.requires_grad]
# print("Trainable layers:", trainable_layers)



In [None]:
# Initialize lists to store losses
train_losses = []
eval_losses = []

# Live plotting function
def plot_losses():
    plt.figure(figsize=(8, 5))
    plt.plot(train_losses, label="Train Loss", marker="o")
    plt.plot(eval_losses, label="Eval Loss", marker="o", linestyle="dashed")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training vs Evaluation Loss")
    plt.legend()
    plt.grid()
    plt.show(block=False)
    plt.pause(0.1)  # Pause to update the plot

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training Loop with tqdm & Live Graph
def train_model(model, train_loader, test_loader, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_train_loss = 0
        # tqdm progress bar for training
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        for images, targets in progress_bar:
            optimizer.zero_grad()
            # Move data to GPU if available
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            outputs = model(images, targets)

            # Compute loss
            # loss = criterion(outputs, targets)
            loss = sum(loss for loss in outputs.values())
            loss.backward()
            optimizer.step()
            
            total_train_loss += loss.item()
            progress_bar.set_postfix(loss=f"{loss.item():.4f}")

        avg_train_loss = total_train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Evaluate Model After Each Epoch
        avg_eval_loss = evaluate_model(model, test_loader)
        eval_losses.append(avg_eval_loss)

        # Print Epoch Summary
        print(f"\nEpoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Eval Loss = {avg_eval_loss:.4f}")


# Evaluation Function with tqdm Progress Bar
def evaluate_model(model, test_loader):
    model.train()
    total_loss = 0

    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc="Evaluating", unit="batch")
        for images, targets in progress_bar:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # When targets are provided, the model returns a dictionary of losses.
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())
            total_loss += loss.item()

            progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    avg_loss = total_loss / len(test_loader)
    return avg_loss

# Check if CUDA is available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [None]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
print(device)
model.to(device)
for _ in range(1):
    # Train Model with Evaluation Each Epoch
    train_model(model, train_loader, test_loader, num_epochs=5)
    plot_losses()
    torch.save(model.state_dict(), r"C:\PROJECTS\MILO\MILO\FaceRecognition\model.pth")
    

In [None]:
# Convert to ONNX with 1080p Input
dummy_input = torch.randn(1, 3, 1080, 1920).to(device)  # Adjusted for 1080p
torch.onnx.export(model, dummy_input, ["ssd_1080p.onnx"], dynamo=True)

In [None]:
torch.save(model.state_dict(), r"C:\PROJECTS\MILO\MILO\FaceRecognition\model.pth")

In [None]:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH, weights_only=True))
model.eval()