In [1]:
import os
from copy import deepcopy

from torch.utils.data import Dataset, DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

import numpy as np

import custom_model

from PIL import Image

# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
# transform for images
transform = transforms.Compose([
    transforms.Resize((2048, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Get the data
all_data = []
# folder_paths=[r"D:\Unity\AITX_PanLoc\Assets\data\21.34938_-12.25565_folder"]
# folder_paths=[r"D:\Unity\AITX_PanLoc\Assets\data\-30.19358_-31.25824_folder"]
folder_paths=[r"D:\Unity\AITX_PanLoc\Assets\data\25.53818_-38.33201_folder"]
for folder_path in folder_paths:
    file_paths = os.listdir(folder_path)

    base_image = next((os.path.join(folder_path, f) for f in file_paths if "_1." in f and f.endswith(".jpg")), None)
    base_text = next((os.path.join(folder_path, f) for f in file_paths if "_1." in f and f.endswith(".txt")), None)

    if not (base_image and base_text):
        print(f"Base image or text not found in {folder_path}")
        continue

    with open(base_text, 'r') as f:
        base_position = [float(x) for x in f.read().split("(")[1].split(")")[0].split(",")]

    for file_path in file_paths:
        if "_1." in file_path or not file_path.endswith(".jpg"):
            continue

        image_path = os.path.join(folder_path, file_path)
        txt_path = os.path.join(folder_path, f"{os.path.splitext(file_path)[0]}.txt")

        if os.path.exists(txt_path):
            with open(txt_path, 'r') as f:
                location = [float(x) for x in f.read().split("(")[1].split(")")[0].split(",")]
            relative_x = location[0] - base_position[0]
            relative_y = location[2] - base_position[2]
            
            max_distance = 50
            normalized_x = np.clip(relative_x / max_distance, -1, 1)
            normalized_y = np.clip(relative_y / max_distance, -1, 1)
            
            all_data.append((base_image, image_path, normalized_x, normalized_y, 1.0))

In [3]:
# Turn the paths into tensors of the image
for i, data in enumerate(all_data):
    base_image=Image.open(data[0])
    base_image_tensor=transform(base_image).unsqueeze(0)
    current_image=Image.open(data[1])
    current_image_tensor=transform(current_image).unsqueeze(0)
    position=(data[2], data[3])
    all_data[i]=(base_image_tensor, current_image_tensor, position)
    base_image.close()
    current_image.close()

In [None]:
for data in all_data[:15]:
    print(data[2])

    # dataset = ImagePairDataset(all_data)
# dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [4]:
class ImagePairDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        base_img, current_img, position = self.data[idx]
        return base_img.squeeze(0), current_img.squeeze(0), torch.tensor(position)

# split into training and validation by 80/20
split = int(0.8 * len(all_data))
train_data = all_data[:split]
val_data = all_data[split:]

train_dataset = ImagePairDataset(train_data)
val_dataset = ImagePairDataset(val_data)

train_dataloader = DataLoader(train_dataset, batch_size=5, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=5, shuffle=True)

In [8]:
# Redo model weights
model = custom_model.ImagePositionPredictor()
# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)

def custom_loss(pred, target):
    # convert everything to float
    pred = pred.float()
    target = target.float()
    # print(pred[:, :2])
    # print(target[:, :2])
    # print(F.mse_loss(pred[:, :2], target[:, :2]))

    # return mse_loss#+bce_loss

    #mse_loss = F.mse_loss(pred[:, :2], target[:, :2])
    bce_loss = F.l1_loss(pred[:, :2], target[:, :2])
    return bce_loss

In [9]:
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

best=10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for base_img, current_img, position in train_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        optimizer.zero_grad()
        output = model(base_img, current_img)
        loss = custom_loss(output, position)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

    # validation
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for base_img, current_img, position in val_dataloader:
            base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

            output = model(base_img, current_img)
            loss = custom_loss(output, position)

            total_loss += loss.item()
    print(f"Validation Loss: {total_loss / len(val_dataloader):.4f}")

    if (total_loss / len(val_dataloader))<best:
        best=total_loss / len(val_dataloader)
        torch.save(model.state_dict(), "best_model.pth")
    else:
        torch.save(model.state_dict(), "model.pth")

    scheduler.step(avg_loss)

Epoch 1/10, Loss: 0.3471
Validation Loss: 0.3364
Epoch 2/10, Loss: 0.3332
Validation Loss: 0.3327
Epoch 3/10, Loss: 0.3205
Validation Loss: 0.3494
Epoch 4/10, Loss: 0.3353
Validation Loss: 0.3369
Epoch 5/10, Loss: 0.3359
Validation Loss: 0.3229
Epoch 6/10, Loss: 0.3218
Validation Loss: 0.3450
Epoch 7/10, Loss: 0.3356
Validation Loss: 0.3474
Epoch 8/10, Loss: 0.3351
Validation Loss: 0.3276
Epoch 9/10, Loss: 0.3188
Validation Loss: 0.3249
Epoch 10/10, Loss: 0.3162
Validation Loss: 0.3239


In [None]:
# validation
model.eval()
total_loss = 0
with torch.no_grad():
    for base_img, current_img, position in val_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        output = model(base_img, current_img)
        loss = custom_loss(output, position)

        total_loss += loss.item()
print(f"Validation Loss: {total_loss / len(val_dataloader):.4f}")

In [None]:
# validation
model.eval()
total_loss = 0
with torch.no_grad():
    for base_img, current_img, position in val_dataloader:
        base_img, current_img, position = base_img.to(device), current_img.to(device), position.to(device)

        numbers = [tensor.item() for tensor in output[0]]
        print(numbers[0:2])
        numbers = [tensor.item() for tensor in position[0]]
        print(numbers)
        print()

        output = model(base_img, current_img)
        loss = custom_loss(output, position)

        total_loss += loss.item()
    

In [None]:
# run prediciton on a data point
model.eval()

base_img, current_img, position = dataset[15]

base_img = base_img.to(device).unsqueeze(0)

current_img = current_img.to(device).unsqueeze(0)

output = model(base_img, current_img)

print(f"Predicted: {output}, Actual: {position}")