In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import time
import datetime
import csv
import config
from torch.utils.data import random_split
from importnb import Notebook
with Notebook():
    from jetbot_dataset import *

In [2]:
# --- Diffusion Helpers ---
def linear_beta_schedule(timesteps, beta_start, beta_end):
    return torch.linspace(beta_start, beta_end, timesteps)

def cosine_beta_schedule(timesteps, s=0.008):
    steps = timesteps + 1
    x = torch.linspace(0, timesteps, steps, dtype=torch.float64)
    alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * np.pi * 0.5) ** 2
    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
    return torch.clip(betas, 0, 0.999)

def get_index_from_list(vals, t, x_shape):
    batch_size = t.shape[0]
    out = vals.gather(-1, t)
    return out.reshape(batch_size, *((1,) * (len(x_shape) - 1))).to(t.device)

def forward_diffusion_sample(x_0, t, betas, alphas_cumprod, device="cpu"):
    noise = torch.randn_like(x_0)
    sqrt_alphas_cumprod_t = get_index_from_list(torch.sqrt(alphas_cumprod), t, x_0.shape)
    sqrt_one_minus_alphas_cumprod_t = get_index_from_list(
        torch.sqrt(1. - alphas_cumprod), t, x_0.shape
    )
    return sqrt_alphas_cumprod_t * x_0 + sqrt_one_minus_alphas_cumprod_t * noise, noise

# --- U-Net Model ---
class Block(nn.Module):
    def __init__(self, in_ch, out_ch, time_emb_dim, up=False):
        super().__init__()
        self.time_mlp =  nn.Linear(time_emb_dim, out_ch)
        if up:
            self.conv1 = nn.Conv2d(2*in_ch, out_ch, 3, padding=1)
            self.transform = nn.ConvTranspose2d(out_ch, out_ch, 4, 2, 1)
        else:
            self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
            self.transform = nn.Conv2d(out_ch, out_ch, 4, 2, 1)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
        self.bnorm1 = nn.BatchNorm2d(out_ch)
        self.bnorm2 = nn.BatchNorm2d(out_ch)
        self.relu  = nn.ReLU()

    def forward(self, x, t, ):
        h = self.bnorm1(self.relu(self.conv1(x)))
        time_emb = self.relu(self.time_mlp(t))
        time_emb = time_emb[(..., ) + (None, ) * 2]
        h = h + time_emb
        h = self.bnorm2(self.relu(self.conv2(h)))
        return self.transform(h)

class SinusoidalPositionEmbeddings(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, time):
        device = time.device
        half_dim = self.dim // 2
        embeddings = np.log(10000) / (half_dim - 1)
        embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
        embeddings = time[:, None] * embeddings[None, :]
        embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
        return embeddings

class SimpleUNet(nn.Module):
    def __init__(self, image_channels=3, down_channels=(64, 128, 256), up_channels=(256, 128, 64), time_emb_dim=32, num_prev_frames=4):
        super().__init__()
        self.time_mlp = nn.Sequential(
                SinusoidalPositionEmbeddings(time_emb_dim),
                nn.Linear(time_emb_dim, time_emb_dim),
                nn.ReLU()
            )
        self.conv0 = nn.Conv2d(image_channels * (num_prev_frames + 1), down_channels[0], 3, padding=1)

        self.downs = nn.ModuleList([Block(down_channels[i], down_channels[i+1], \
                                    time_emb_dim + 1) \
                    for i in range(len(down_channels)-1)])
        self.ups = nn.ModuleList([Block(up_channels[i], up_channels[i+1], \
                                        time_emb_dim + 1, up=True) \
                    for i in range(len(up_channels)-1)])

        self.output = nn.Conv2d(up_channels[-1], image_channels, 1)


    def forward(self, x, timestep, action, prev_frames):
        x = torch.cat([x, prev_frames], dim=1)

        t = self.time_mlp(timestep)
        if action is not None:
            action = action.float()
            if len(action.shape) == 1:
                action = action.unsqueeze(1)
            t = torch.cat([t, action], dim=1)

        x = self.conv0(x)
        residual_inputs = []
        for down in self.downs:
            x = down(x, t)
            residual_inputs.append(x)
        for up in self.ups:
            residual_x = residual_inputs.pop()
            x = torch.cat((x, residual_x), dim=1)
            x = up(x, t)
        return self.output(x)

# --- Training Loop ---
def train(model, dataloader, optimizer, betas, alphas_cumprod, start_epoch, num_epochs,
          device, save_every, sample_every, checkpoint_dir, sample_dir, plot_dir,
          plot_every, use_fp16, accumulation_steps, num_prev_frames,
          early_stopping_patience, early_stopping_percentage, min_epochs):
    """
    Trains the diffusion model with early stopping and best model saving/deletion.
    """

    all_losses = []
    start_time = time.time()
    last_plot_epoch = 0
    best_loss = float('inf')
    best_epoch = 0
    epochs_without_improvement = 0
    moving_avg_window = 10
    moving_avg_losses = []
    previous_best_model_path = None  # Keep track of the previous best model's path

    scaler = torch.cuda.amp.GradScaler(enabled=use_fp16)

    for epoch in range(start_epoch, num_epochs):
        model.train()
        epoch_losses = []
        optimizer.zero_grad()
        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for step, (images, actions, prev_frames) in enumerate(pbar):
            images = images.to(device)
            actions = actions.to(device)
            prev_frames = prev_frames.to(device)
            t = torch.randint(0, config.NUM_TIMESTEPS, (images.shape[0],), device=device).long()

            with torch.cuda.amp.autocast(enabled=use_fp16):
                x_noisy, noise = forward_diffusion_sample(images, t, betas, alphas_cumprod, device)
                predicted_noise = model(x_noisy, t, actions, prev_frames)
                loss = F.mse_loss(noise, predicted_noise)
                loss = loss / accumulation_steps

            scaler.scale(loss).backward()

            if (step + 1) % accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()

            epoch_losses.append(loss.item() * accumulation_steps)
            pbar.set_postfix({"Loss": loss.item() * accumulation_steps})

        if optimizer.param_groups[0]['params'][0].grad is not None:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        avg_epoch_loss = sum(epoch_losses) / len(epoch_losses)
        all_losses.append(avg_epoch_loss)

        moving_avg_losses.append(avg_epoch_loss)
        if len(moving_avg_losses) > moving_avg_window:
            moving_avg_losses.pop(0)
        current_moving_avg = sum(moving_avg_losses) / len(moving_avg_losses)

        if (epoch + 1) % save_every == 0:
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': avg_epoch_loss,
            }, os.path.join(checkpoint_dir, f"model_epoch_{epoch+1}.pth"))
            print(f"Saved model checkpoint at epoch {epoch+1}")

        if (epoch + 1) % sample_every == 0:
            model.eval()
            with torch.no_grad():
                random_idx = torch.randint(0, len(dataset), (1,)).item()
                real_current_frame, action, real_prev_frames = dataset[random_idx]
                real_current_frame = real_current_frame.unsqueeze(0).to(device)
                real_prev_frames = real_prev_frames.unsqueeze(0).to(device)
                action = action.to(device)

                t_sample = torch.tensor([config.NUM_TIMESTEPS - 1], device=device, dtype=torch.long)
                x_noisy, _ = forward_diffusion_sample(real_current_frame, t_sample, betas, alphas_cumprod, device)
                x = x_noisy

                for i in reversed(range(1, config.NUM_TIMESTEPS)):
                    t = (torch.ones(1) * i).long().to(device)
                    with torch.cuda.amp.autocast(enabled=use_fp16):
                        predicted_noise = model(x, t, action, real_prev_frames)

                    alpha = alphas[t][:, None, None, None]
                    alpha_hat = alphas_cumprod[t][:, None, None, None]
                    beta = betas[t][:, None, None, None]

                    if i > 1:
                        noise = torch.randn_like(x)
                    else:
                        noise = torch.zeros_like(x)
                    x = 1 / torch.sqrt(alpha) * (x - ((1 - alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise
                predicted_next_frame = (x.clamp(-1, 1) + 1) / 2
                predicted_next_frame = (predicted_next_frame * 255).type(torch.uint8)
                prev_images = []

                for i in range(num_prev_frames):
                    frame = real_prev_frames[0, (i * 3):(i + 1) * 3, :, :]
                    frame = (frame.clamp(-1, 1) + 1) / 2
                    frame = (frame * 255).type(torch.uint8)
                    prev_images.append(transforms.ToPILImage()(frame))

                current_image = transforms.ToPILImage()((real_current_frame[0].clamp(-1, 1) + 1) / 2 * 255).type(torch.uint8).convert("RGB") #Fixed
                predicted_image = transforms.ToPILImage()(predicted_next_frame[0]).convert("RGB")

                total_width = (num_prev_frames + 2) * config.IMAGE_SIZE
                max_height = config.IMAGE_SIZE
                new_im = Image.new('RGB', (total_width, max_height))

                x_offset = 0
                for image in prev_images:
                    new_im.paste(image, (x_offset,0))
                    x_offset += config.IMAGE_SIZE
                new_im.paste(current_image, (x_offset, 0))
                x_offset += config.IMAGE_SIZE
                new_im.paste(predicted_image, (x_offset, 0))

                new_im.save(os.path.join(sample_dir, f"sample_epoch_{epoch+1}.png"))
                print(f"Saved sample image at epoch {epoch+1}")

            model.train()

        if (epoch + 1) % plot_every == 0:
            elapsed_time = time.time() - start_time
            formatted_time = str(datetime.timedelta(seconds=elapsed_time))

            fig, axes = plt.subplots(1, 2, figsize=(16, 5))

            axes[0].plot(all_losses)
            axes[0].set_xlabel("Epoch")
            axes[0].set_ylabel("Loss")
            axes[0].set_title(f"Training Loss from Start (Time: {formatted_time})")
            axes[0].grid(True)

            axes[1].plot(range(last_plot_epoch + 1, epoch + 2), all_losses[last_plot_epoch:])
            axes[1].set_xlabel("Epoch")
            axes[1].set_ylabel("Loss")
            axes[1].set_title(f"Loss Since Last Plot (Epoch {last_plot_epoch + 1})")
            axes[1].grid(True)

            plt.tight_layout()
            plt.savefig(os.path.join(plot_dir, f"loss_plot_epoch_{epoch+1}.png"))
            plt.close()
            print(f"Epoch {epoch+1}: Loss = {avg_epoch_loss:.4f}, Time = {formatted_time}")

            last_plot_epoch = epoch

        # --- Early Stopping (Dynamic Threshold) and Best Model Saving/Deletion---
        if early_stopping_patience is not None and epoch + 1 > min_epochs:
            if current_moving_avg < best_loss:
                best_loss = current_moving_avg
                best_epoch = epoch + 1
                epochs_without_improvement = 0

                # Save the *best* model
                new_best_model_path = os.path.join(checkpoint_dir, f"model_best_epoch_{best_epoch}.pth")
                torch.save({
                    'epoch': best_epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_loss,  # Save the best loss
                }, new_best_model_path)
                print(f"Saved best model at epoch {best_epoch} with loss {best_loss}")

                # Delete the *previous* best model (if it exists)
                if previous_best_model_path and os.path.exists(previous_best_model_path):
                    os.remove(previous_best_model_path)
                    print(f"Deleted previous best model: {previous_best_model_path}")
                previous_best_model_path = new_best_model_path # Update the path

            else:
                epochs_without_improvement += 1

            if epochs_without_improvement >= early_stopping_patience:
                if len(moving_avg_losses) == moving_avg_window:
                    prev_moving_avg = sum(moving_avg_losses[:-1]) / (moving_avg_window - 1)
                    improvement = (prev_moving_avg - current_moving_avg) / prev_moving_avg * 100
                if improvement < early_stopping_percentage:
                    print(f"Early stopping triggered at epoch {epoch+1}.  Improvement: {improvement:.2f}%")
                    break

    end_time = time.time()
    total_time = end_time - start_time
    formatted_time = str(datetime.timedelta(seconds=total_time))
    print(f"Total training time: {formatted_time}")

    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': avg_epoch_loss,
    }, os.path.join(checkpoint_dir, "model_last.pth"))
    print(f"Saved last model at epoch {epoch+1} with loss {avg_epoch_loss}")

    return all_losses
    

In [3]:
if __name__ == "__main__":
    # --- Data Transforms ---
    transform = transforms.Compose([
        transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    # --- Create Dataset and DataLoader ---
    dataset = JetbotDataset(config.CSV_PATH, config.DATA_DIR, config.IMAGE_SIZE, config.NUM_PREV_FRAMES, transform=transform)
    
    # Try to load existing split
    train_dataset, test_dataset = load_train_test_split(dataset, config.SPLIT_DATASET_FILENAME)
    
    if train_dataset is None or test_dataset is None:
        print("Dataset split file not found, creating a new split...")
        train_size = int(0.8 * len(dataset))  # 80% for training
        test_size = len(dataset) - train_size
        train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
        save_existing_split(train_dataset, test_dataset, config.SPLIT_DATASET_FILENAME)
    else:
        print("Loaded existing dataset split.")
    
    train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False) # Batch size 1 for easier evaluation
    
    # --- Calculate Betas and Alphas ---
    betas = linear_beta_schedule(config.NUM_TIMESTEPS, config.BETA_START, config.BETA_END).to(config.DEVICE)
    #betas = cosine_beta_schedule(NUM_TIMESTEPS).to(DEVICE) # Alternative
    
    alphas = (1. - betas).to(config.DEVICE)
    alphas_cumprod = torch.cumprod(alphas, axis=0).to(config.DEVICE)
    alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0).to(config.DEVICE)
    sqrt_recip_alphas = torch.sqrt(1.0 / alphas).to(config.DEVICE)
    
    # --- Create Model and Optimizer ---
    model = SimpleUNet(image_channels=3, down_channels=(64, 128, 256), up_channels=(256, 128, 64), time_emb_dim=32, num_prev_frames=config.NUM_PREV_FRAMES).to(config.DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.LEARNING_RATE)
    
    # --- Load Checkpoint (if available) ---
    if config.LOAD_CHECKPOINT:
        checkpoint = torch.load(config.LOAD_CHECKPOINT)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        START_EPOCH = checkpoint['epoch']
        print(f"Loaded checkpoint from epoch {START_EPOCH}")
    else:
        START_EPOCH = 0
    
    # --- Train the Model ---
    losses = train(model, train_dataloader, optimizer, betas, alphas_cumprod, START_EPOCH, config.NUM_EPOCHS, config.DEVICE,
                   config.SAVE_MODEL_EVERY, config.SAMPLE_EVERY, config.CHECKPOINT_DIR, config.SAMPLE_DIR, config.PLOT_DIR, config.PLOT_EVERY, config.USE_FP16,
                   config.ACCUMULATION_STEPS, config.NUM_PREV_FRAMES, early_stopping_patience=config.EARLY_STOPPING_PATIENCE, early_stopping_percentage=config.EARLY_STOPPING_PERCENTAGE, min_epochs=config.MIN_EPOCHS)
    print("Training complete!")
    
    # --- Final Loss Plot ---
    plt.figure(figsize=(10, 5))
    plt.plot(losses)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Loss")
    plt.grid(True)
    plt.savefig(os.path.join(config.PLOT_DIR, "loss_plot_final.png"))  # Save to plot dir
    plt.close()

Loaded combined CSV with columns: ['session_id', 'image_path', 'timestamp', 'action']
Total rows in CSV: 23081, Valid sequence start indices: 23037
Dataset split file not found, creating a new split...


Epoch 1/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 2/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 3/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 4/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 5/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 6/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 6 with loss 0.026768123115845


Epoch 7/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 7 with loss 0.024061147295065487
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_6.pth


Epoch 8/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 8 with loss 0.0219071691936708
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_7.pth


Epoch 9/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 9 with loss 0.020271183152401673
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_8.pth


Epoch 10/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 10
Epoch 10: Loss = 0.0062, Time = 0:51:44.327066
Saved best model at epoch 10 with loss 0.018860962881414402
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_9.pth


Epoch 11/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 11 with loss 0.011134282957364879
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_10.pth


Epoch 12/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 12 with loss 0.009294241681853539
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_11.pth


Epoch 13/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 13 with loss 0.007994079356555831
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_12.pth


Epoch 14/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 14 with loss 0.007074766982779897
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_13.pth


Epoch 15/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 15 with loss 0.006281358286706033
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_14.pth


Epoch 16/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 16 with loss 0.005776196348363606
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_15.pth


Epoch 17/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 17 with loss 0.005381331810327892
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_16.pth


Epoch 18/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 18 with loss 0.005090616749788524
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_17.pth


Epoch 19/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 19 with loss 0.004766328634762986
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_18.pth


Epoch 20/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 20
Epoch 20: Loss = 0.0038, Time = 1:43:17.170330
Saved best model at epoch 20 with loss 0.004530658674778149
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_19.pth


Epoch 21/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 21 with loss 0.004332061396830353
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_20.pth


Epoch 22/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 22 with loss 0.004187792200967996
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_21.pth


Epoch 23/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 23 with loss 0.004030764782246227
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_22.pth


Epoch 24/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 24 with loss 0.003909941722037047
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_23.pth


Epoch 25/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 25 with loss 0.003793861846367008
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_24.pth


Epoch 26/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 26 with loss 0.0036796633452739667
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_25.pth


Epoch 27/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 27 with loss 0.003612675512360712
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_26.pth


Epoch 28/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 28 with loss 0.003533821115609549
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_27.pth


Epoch 29/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 29 with loss 0.003452066186759832
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_28.pth


Epoch 30/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 30
Epoch 30: Loss = 0.0033, Time = 2:34:33.592807
Saved best model at epoch 30 with loss 0.0034008632129743747
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_29.pth


Epoch 31/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 31 with loss 0.0033481739950433262
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_30.pth


Epoch 32/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 32 with loss 0.0032706526509697492
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_31.pth


Epoch 33/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 33 with loss 0.003230057489351642
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_32.pth


Epoch 34/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 34 with loss 0.0031801565777534006
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_33.pth


Epoch 35/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 35 with loss 0.0031485920657625377
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_34.pth


Epoch 36/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 36 with loss 0.00312129686709568
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_35.pth


Epoch 37/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 37 with loss 0.0031117836180240173
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_36.pth


Epoch 38/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 39/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 39 with loss 0.003076191596001049
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_37.pth


Epoch 40/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 40
Epoch 40: Loss = 0.0029, Time = 3:25:45.419073
Saved best model at epoch 40 with loss 0.003032930892841218
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_39.pth


Epoch 41/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 41 with loss 0.0030009219842988774
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_40.pth


Epoch 42/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 42 with loss 0.0029888733284269935
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_41.pth


Epoch 43/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 43 with loss 0.002955073633440798
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_42.pth


Epoch 44/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 44 with loss 0.002927071811990977
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_43.pth


Epoch 45/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 45 with loss 0.0029124995751800497
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_44.pth


Epoch 46/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 46 with loss 0.002876093067912519
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_45.pth


Epoch 47/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 47 with loss 0.00282307404510915
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_46.pth


Epoch 48/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 48 with loss 0.0028049784134506375
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_47.pth


Epoch 49/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 49 with loss 0.002788240651016673
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_48.pth


Epoch 50/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 50
Epoch 50: Loss = 0.0027, Time = 4:16:56.188074
Saved best model at epoch 50 with loss 0.002774257544374513
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_49.pth


Epoch 51/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 51 with loss 0.0027438207719815938
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_50.pth


Epoch 52/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 52 with loss 0.0027309883682875126
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_51.pth


Epoch 53/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 53 with loss 0.0026978013221861065
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_52.pth


Epoch 54/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 54 with loss 0.002677420634677851
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_53.pth


Epoch 55/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 55 with loss 0.0026531513919939207
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_54.pth


Epoch 56/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 57/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 58/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 58 with loss 0.00261720418506586
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_55.pth


Epoch 59/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 59 with loss 0.0026068512269126305
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_58.pth


Epoch 60/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved sample image at epoch 60
Epoch 60: Loss = 0.0026, Time = 5:08:27.138695
Saved best model at epoch 60 with loss 0.0025907044690536432
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_59.pth


Epoch 61/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 61 with loss 0.002581490996556113
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_60.pth


Epoch 62/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Saved best model at epoch 62 with loss 0.0025456127861970246
Deleted previous best model: output_two_action_sessions_dataset\checkpoints\model_best_epoch_61.pth


Epoch 63/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 64/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 65/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 66/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 67/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 68/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Epoch 69/1000:   0%|          | 0/4608 [00:00<?, ?it/s]

Early stopping triggered at epoch 69.  Improvement: -0.20%
Total training time: 5:54:56.924194
Saved last model at epoch 69 with loss 0.0026045583277386365
Training complete!


In [7]:
config.SAMPLE_EVERY

20