In [1]:
import os
from copy import deepcopy

from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

import numpy as np

import custom_model

from PIL import Image

# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
# Create the dataset
import custom_data

# folder_paths = [r"D:\Unity\AITX_PanLoc\Assets\data\-30.19358_-31.25824_folder"]
# folder_paths=[r"D:\Unity\AITX_PanLoc\Assets\25.53818_-38.33201_folder"]
folder_paths=[r"D:\Unity\AITX_PanLoc\Assets\data\-22.05379_5.70192_folder"]
dataset = custom_data.OptimizedImagePairDataset(folder_paths, transform=custom_data.transform)

# Split into training and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=True)

In [3]:
# Redo model weights
model = custom_model.ImagePositionPredictor()
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)

def custom_loss(pred, target):
    # convert everything to float
    pred = pred.float()
    target = target.float()
    # print(pred[:, :2])
    # print(target[:, :2])
    # print(F.mse_loss(pred[:, :2], target[:, :2]))

    # return mse_loss#+bce_loss

    mse_loss = F.mse_loss(pred[:, :2], target[:, :2])
    bce_loss = F.l1_loss(pred[:, :2], target[:, :2])
    return mse_loss+bce_loss*0.5

Total parameters: 551683


In [5]:
len(train_dataloader), len(val_dataloader)

(100, 25)

In [8]:
num_epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

best=10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for base_img, current_img, position in train_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        optimizer.zero_grad()
        output = model(base_img, current_img)
        loss = custom_loss(output, position)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

    # validation
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for base_img, current_img, position in val_dataloader:
            base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)
            # 

            output = model(base_img, current_img)
            loss = custom_loss(output, position)

            total_loss += loss.item()
    print(f"Validation Loss: {total_loss / (len(val_dataloader)*4):.4f}")

    if (total_loss / len(val_dataloader))<best:
        best=total_loss / len(val_dataloader)
        torch.save(model.state_dict(), "best_model.pth")
    else:
        torch.save(model.state_dict(), "model.pth")

    scheduler.step(avg_loss)

Epoch 1/5, Loss: 0.1736
Validation Loss: 0.0464
Epoch 2/5, Loss: 0.1822
Validation Loss: 0.0187
Epoch 3/5, Loss: 0.1477
Validation Loss: 0.0312
Epoch 4/5, Loss: 0.1674
Validation Loss: 0.0450
Epoch 5/5, Loss: 0.1301
Validation Loss: 0.0297


In [7]:
# validation
model.eval()
total_loss = 0
with torch.no_grad():
    for base_img, current_img, position in val_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        print("BATCH: ")
        for i in range(len(output)):
            numbers = [tensor.item() for tensor in output[i]]
            print(numbers[0:2])
            numbers = [tensor.item() for tensor in position[i]]
            print(numbers)
            print()

        output = model(base_img, current_img)
        loss = custom_loss(output, position)

        total_loss += loss.item()
    

BATCH: 
[0.14978259801864624, 0.07338636368513107]
[0.8324000000000001, -0.32439999999999997]

[0.21963109076023102, -0.9385496973991394]
[0.43320000000000003, 0.7609999999999999]

[0.811793327331543, -0.3725028336048126]
[0.7332, 0.16079999999999997]

[-0.3975684344768524, 0.6964311599731445]
[1.0, -0.8154]

BATCH: 
[0.9226142764091492, -0.698104739189148]
[0.7118000000000001, 0.5291999999999999]

[0.33172231912612915, 0.4837346374988556]
[0.5572, -0.21259999999999998]

[0.8350929617881775, -0.4745353162288666]
[-0.17659999999999998, -0.3]

[0.9980621337890625, -0.8591662049293518]
[-0.2868, -0.8822]

BATCH: 
[0.6960169076919556, -0.24417708814144135]
[-0.38919999999999993, 0.7797999999999999]

[0.7119072675704956, -0.6839057207107544]
[0.4868, -0.4664]

[-0.17688916623592377, -0.5479764342308044]
[0.5640000000000001, 0.32620000000000005]

[-0.2648482024669647, -0.925972044467926]
[0.8152000000000001, -0.6275999999999999]

BATCH: 
[-0.3975684344768524, 0.6964311599731445]
[1.0, -0.547

In [None]:
# load model from best model
model.load_state_dict(torch.load("best_model.pth"))

# validation
model.eval()
total_loss = 0
with torch.no_grad():
    for base_img, current_img, position in val_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        output = model(base_img, current_img)
        loss = custom_loss(output, position)

        total_loss += loss.item()
print(f"Validation Loss: {total_loss / len(val_dataloader):.4f}")

In [None]:
# run prediciton on a data point
model.eval()

base_img, current_img, position = dataset[15]

base_img = base_img.to(device).unsqueeze(0)

current_img = current_img.to(device).unsqueeze(0)

output = model(base_img, current_img)

print(f"Predicted: {output}, Actual: {position}")