In [1]:
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F

import pynvml
import psutil

import cv2
import numpy as np

import matplotlib.pyplot as plt
import multiprocessing as mp


import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import sys

from tqdm import tqdm
from IPython.display import clear_output

sys.path.append('/home/meribejayson/Desktop/Projects/SharkCNN/training_models/YOLO3D/yolo_dataloaders')
sys.path.append("/home/meribejayson/Desktop/Projects/SharkCNN/training_models/YOLO3D/model_impl")

from train_dataset import SharkYOLODataset
from yolo3d import YOLO3D
import yolo_loss

In [2]:
torch.manual_seed(12)

if not torch.cuda.is_available():
    raise Exception("Couldn't find CUDA")

device = torch.device("cuda")

pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)

In [3]:
num_frames = 50
first_conv_layer_size = 16

In [4]:
class YOLO_Loss(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.high = yolo_loss.YOLO_Loss()
        self.mid = yolo_loss.YOLO_Loss()
        self.low = yolo_loss.YOLO_Loss()

    def forward(self, high, high_hat, mid, mid_hat, low, low_hat):
        total_loss = 0
        xbox_hat, xclass_hat = high_hat
        xbox, xclass , num_pos_exps = high

        total_loss += self.high(xbox, xbox_hat, xclass, xclass_hat, num_pos_exps)

        xbox_hat, xclass_hat = mid_hat
        xbox, xclass , num_pos_exps = mid

        total_loss += self.mid(xbox, xbox_hat, xclass, xclass_hat, num_pos_exps)

        xbox_hat, xclass_hat = low_hat
        xbox, xclass , num_pos_exps = low

        total_loss += self.low(xbox, xbox_hat, xclass, xclass_hat, num_pos_exps)
        
        return total_loss

In [5]:
mp.set_start_method('spawn', force=True)
shark_dataset = SharkYOLODataset(num_frames, device=device)
data_loader = data.DataLoader(dataset=shark_dataset, batch_size=1, num_workers=1)

In [6]:
# state_dict = torch.load("./train-1/ann_weights_train_1.tar")

In [7]:
model = YOLO3D(num_frames, num_features=first_conv_layer_size)
# model.load_state_dict(state_dict)
model.to(device)

YOLO3D(
  (backbone): Backbone(
    (conv1): Conv(
      (conv3d): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
      (batchnorm3d): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (conv2): Conv(
      (conv3d): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
      (batchnorm3d): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (c2f1): C2F(
      (conv1): Conv(
        (conv3d): Conv3d(32, 32, kernel_size=(1, 1, 1), stride=(1, 1, 1))
        (batchnorm3d): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (silu): SiLU()
      )
      (bottlenecks): ModuleList(
        (0-1): 2 x Bottleneck(
          (conv1): Conv(
            (conv3d): Conv3d(10, 10, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
            (batchnorm3d): BatchNorm3d(10, eps=1e-05, momentu

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.0001)

In [9]:
loss_module = YOLO_Loss()

In [10]:
LARGE_NUM = 2e120
target_loss_change = 1e-6

def train_model(model, optimizer, data_loader, epochs=25):
    model.train()
    last_average_loss = LARGE_NUM
    curr_average_loss = 0

    for epoch in tqdm(range(epochs), desc='Epoch Progress'):
        total_iter_avg_loss = 0

        # Wrapping data_loader with tqdm for visibility on each example/batch
        for point in tqdm(data_loader, desc=f'Processing Data - Epoch {epoch+1}', leave=False):
            exp, (high, mid, low) = point

            low_pred, mid_pred, high_pred = model(exp)

            loss = loss_module(high, high_pred, mid, mid_pred, low, low_pred)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_iter_avg_loss += loss.item()

        if (epoch % 2) == 0:
            state_dict = model.state_dict()
            torch.save(state_dict, f"/yolo3d_{epoch}.tar")

        last_average_loss = curr_average_loss
        # Assuming 'data_loader' accurately represents the number of examples processed
        curr_average_loss = total_iter_avg_loss / len(data_loader)  

        print(f'Epoch: {epoch}, Last Average Loss: {last_average_loss:.6f}, Current Average Loss: {curr_average_loss:.6f}')
        # Assuming setup for pynvml and psutil is configured
        # print(f"CPU Usage: {psutil.cpu_percent()}% GPU memory usage: {int(info.used / info.total * 100)}% \n")

In [11]:
train_model(model, optimizer, data_loader)

Epoch Progress:   0%|          | 0/25 [00:00<?, ?it/s][W CudaIPCTypes.cpp:16] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]
Epoch Progress:   0%|          | 0/25 [00:03<?, ?it/s]


IndexError: too many indices for tensor of dimension 0

In [None]:
pynvml.nvmlShutdown()