In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from loadData import loadData
from model import network
import matplotlib.pyplot as plt
from loss import customLoss
from torch.optim.lr_scheduler import StepLR
import torch.nn.init as init
from torch.utils.data import random_split

def custom_collate_fn(batch):
    images = torch.stack([item[0] for item in batch])  # Stack images (all have the same size)
    labels = [item[1] for item in batch]  # Keep labels as a list of variable-length tensors
    return images, labels


if __name__ == '__main__':

    model = network()
    #model.init_weights()
    #model.load_state_dict(torch.load('weights.pth'))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print("CUDA Available: ", torch.cuda.is_available())
    #print("Using GPU:", torch.cuda.get_device_name(0))

    data = loadData('WeedCrop.v1i.yolov5pytorch/train/')

    train_loader = DataLoader(data, batch_size=64, shuffle=True, collate_fn=custom_collate_fn)

    criterion = customLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    num_epochs = 100

    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()  # Set model to training mode

        for images, labels in train_loader:
            # Move images and labels to the GPU
            images, labels = images.float().to(device), [label.float().to(device) for label in labels] 
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = 0
            for i in range(outputs.shape[0]):
                loss = loss + criterion(outputs[i, :], labels[i], device)
            
            if torch.isnan(loss).any():
                print(f"Loss became NaN at epoch {epoch}")
                break
            
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            # Get loss weighed by to total number if images in the batch
            running_loss += loss.item() * images.size(0) 
            
        # Get the average loss across the entire epoc
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

        torch.save(model.state_dict(), 'weights.pth')
    
    torch.save(model.state_dict(), 'weights.pth')

CUDA Available:  True
Epoch [1/100], Loss: 115.2690
Epoch [2/100], Loss: 87.0143
Epoch [3/100], Loss: 65.3567
Epoch [4/100], Loss: 54.2766
Epoch [5/100], Loss: 48.4334
Epoch [6/100], Loss: 45.0817
Epoch [7/100], Loss: 41.8850
Epoch [8/100], Loss: 39.0328
Epoch [9/100], Loss: 36.9449
Epoch [10/100], Loss: 34.5718
Epoch [11/100], Loss: 33.3085
Epoch [12/100], Loss: 32.1361
Epoch [13/100], Loss: 30.5714
Epoch [14/100], Loss: 29.9997
Epoch [15/100], Loss: 28.0100
Epoch [16/100], Loss: 27.3341
Epoch [17/100], Loss: 26.3047
Epoch [18/100], Loss: 25.3380
Epoch [19/100], Loss: 24.3901
Epoch [20/100], Loss: 23.9057
Epoch [21/100], Loss: 23.6338
Epoch [22/100], Loss: 22.8968
Epoch [23/100], Loss: 22.7198
Epoch [24/100], Loss: 22.0724
Epoch [25/100], Loss: 22.1456
Epoch [26/100], Loss: 21.9076
Epoch [27/100], Loss: 21.1634
Epoch [28/100], Loss: 20.6292
Epoch [29/100], Loss: 20.3993
Epoch [30/100], Loss: 19.7562
Epoch [31/100], Loss: 19.7323
Epoch [32/100], Loss: 19.7148
Epoch [33/100], Loss: 19.0