In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

import albumentations as A
import cv2
import numpy as np
import skimage as ski

import matplotlib.pyplot as plt
import copy

import os
import sys
import psutil
import pynvml

from tqdm import tqdm
from IPython.display import clear_output


sys.path.append('/home/meribejayson/Desktop/Projects/SharkCNN/training_models/dataloaders/')

from train_dataset import SharkDatasetTrain as SharkDataset

In [2]:
torch.manual_seed(12)

if not torch.cuda.is_available():
    raise Exception("Couldn't find CUDA")

device = torch.device("cuda")

pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)

megaset_path = "/home/meribejayson/Desktop/Projects/SharkCNN/datasets-reduced/megaset/"
megaset_train_images_path = "/home/meribejayson/Desktop/Projects/SharkCNN/datasets-reduced/megaset/train/images/"
megaset_train_labels_path = "/home/meribejayson/Desktop/Projects/SharkCNN/datasets-reduced/megaset/train/labels/"

image_width = 1920
image_height = 1080

In [3]:
class ANN(nn.Module):

    def __init__(self, input_size):
        super().__init__()
        
        hidden_layer_num = int(input_size / 2)

        self.linear1 = nn.Linear(input_size, hidden_layer_num)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(hidden_layer_num + input_size, 1)
        self.sig = nn.Sigmoid()
        

    def forward(self, x):
        h = self.linear1(x)
        h = self.relu1(h)
        h = torch.cat((x, h), dim=1)
        h = self.linear2(h)
        
        return self.sig(h)

In [4]:
shark_dataset = SharkDataset()
data_loader = data.DataLoader(shark_dataset, batch_size=500_000, num_workers=5)

In [5]:
# state_dict = torch.load("./train-final-2/lr_weights_train_2.tar")

In [6]:
model = ANN(85)
# model.load_state_dict(state_dict)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

In [7]:
LARGE_NUM = 2e120
target_loss_change = 1e-6
exps_in_iter = (image_height * image_width * 2)
kappa = 1 / 323
kappa_inv = 323
coef = (1 + kappa) / 2

def train_model(model, optimizer, data_loader):
    model.train()
    last_average_loss = LARGE_NUM
    curr_average_loss = 0
    curr_iter = 1

    while(np.abs(curr_average_loss - last_average_loss) > target_loss_change):
        
        total_iter_avg_loss = 0
        total_points = 0

        for point in data_loader:
            data_inputs = point[:, :-1].to(device).float()
            data_labels = point[:, -1].to(device).float()

            preds = model(data_inputs).squeeze(dim=1)

            weights = torch.ones_like(data_labels)
            weights[data_labels == 1.0] = kappa_inv
            weights *= coef
            
            loss_module = nn.BCELoss(weight=weights)
            loss = loss_module(preds, data_labels.float())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_iter_avg_loss += loss.item() * data_inputs.size(0)
            total_points += data_inputs.size(0)
 
        last_average_loss = curr_average_loss
        curr_average_loss = total_iter_avg_loss / total_points
        
        info = pynvml.nvmlDeviceGetMemoryInfo(handle)
        clear_output(wait=True)
        print(f'Current iteration: {curr_iter - 1}, Average Loss: {last_average_loss}')
        print(f'Current iteration: {curr_iter}, Average Loss: {curr_average_loss}')
        print(f"CPU Usage: {psutil.cpu_percent()}% GPU memory usage: {int(info.used / info.total)}% \n")

        print("Current Parameters:")
        for name, param in model.named_parameters():
            if param.requires_grad:
                print(name, param.data)

        curr_iter += 1

In [8]:
train_model(model, optimizer, data_loader)

KeyboardInterrupt: 

In [None]:
pynvml.nvmlShutdown()
state_dict = model.state_dict()
print(state_dict)
torch.save(state_dict, "ann_weights_train_2.tar")

OrderedDict([('linear1.weight', tensor([[-0.0295, -0.0084, -0.0275,  ..., -0.0002, -0.0007, -0.0008],
        [-0.0022,  0.0188, -0.0003,  ..., -0.0003, -0.0006, -0.0007],
        [ 0.0197,  0.0840,  0.0184,  ..., -0.0152, -0.0154, -0.0177],
        ...,
        [-0.0496, -0.0077, -0.0382,  ..., -0.0153, -0.0165, -0.0190],
        [-0.0030,  0.0174, -0.0007,  ..., -0.0004, -0.0005, -0.0008],
        [ 0.0251,  0.0683,  0.0280,  ..., -0.0267, -0.0178, -0.0266]],
       device='cuda:0')), ('linear1.bias', tensor([ 0.4571,  0.4926,  0.3845,  0.8409,  0.4540,  0.3664,  0.3033,  0.4902,
         0.4560,  0.2078,  0.3590,  0.3504,  0.3298,  0.4441,  1.0028, -0.0083,
        -0.0158,  0.2924,  0.3626, -0.0121,  0.7857,  0.0215,  0.3392,  0.7735,
         0.3349,  0.3200,  0.9406,  0.3883,  0.3078,  0.0150, -0.0304,  0.6940,
         0.3596,  0.7300,  0.4204,  0.6676,  0.3773,  0.3340,  0.5896,  0.4075,
         0.5844,  0.3671], device='cuda:0')), ('linear2.weight', tensor([[ 1.1929e-01, -1.5