In [1]:
import torch
from transformers import BeitFeatureExtractor, BeitForImageClassification
from torch.utils.data import random_split, DataLoader

import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

from dataset import SingleImageDataset, FullLocationDataset

In [2]:
dataset = SingleImageDataset()
train_set, val_set, test_set = random_split(dataset, [0.6, 0.2, 0.2])

In [3]:
train_loader = DataLoader(train_set, batch_size=96, num_workers=4, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=96, num_workers=4, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=96, num_workers=4, shuffle=False)

In [4]:
feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-384')
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-384').to("cuda")

model.classifier = torch.nn.Linear(768, 2).to("cuda")

for param in model.parameters():
    param.requires_grad = False
    
for param in model.classifier.parameters():
    param.requires_grad = True

model.classifier



Linear(in_features=768, out_features=2, bias=True)

In [6]:
def haversine_distance(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1_rad = torch.deg2rad(lat1)
    lon1_rad = torch.deg2rad(lon1)
    lat2_rad = torch.deg2rad(lat2)
    lon2_rad = torch.deg2rad(lon2)

    # Differences in coordinates
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad    

    a = torch.sin(dlat / 2)**2 + torch.cos(lat1_rad) * torch.cos(lat2_rad) * torch.sin(dlon / 2)**2
    c = 2 * torch.atan2(torch.sqrt(a), torch.sqrt(1 - a))

    distance = R * c
    return distance

class HaversineLoss(torch.nn.Module):
    def __init__(self):
        super(HaversineLoss, self).__init__()

    def forward(self, outputs, targets):
        # Assuming outputs and targets are both of shape (batch_size, 2)
        lat1, lon1 = outputs[:, 0], outputs[:, 1]
        lat2, lon2 = targets[:, 0], targets[:, 1]
        
        distances = haversine_distance(lat1, lon1, lat2, lon2)
        return distances.mean()

In [7]:
criterion = HaversineLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=1e-4)

In [9]:
epochs = 100
train_losses = []
val_losses = []

for epoch in tqdm(range(1, epochs+1)):
    running_train_loss = []
    running_val_loss = []

    for images, labels in train_loader:
        labels = labels.cuda(non_blocking=True)
        
        optimizer.zero_grad()

        features = feature_extractor(images, return_tensors="pt")
        features = features['pixel_values'].cuda(non_blocking=True)

        y_pred = model(features)

        loss = criterion(y_pred.logits, labels)
        loss.backward()
        running_train_loss.append(loss.item())

    train_losses.append(np.mean(running_train_loss))

    for images, labels in val_loader:
        with torch.no_grad():
            labels = labels.cuda(non_blocking=True)

            features = feature_extractor(images, return_tensors="pt")
            features = features['pixel_values'].cuda(non_blocking=True)

            y_pred = model(features)

            loss = criterion(y_pred.logits, labels)
            running_val_loss.append(loss.item())

    val_losses.append(np.mean(running_val_loss))

    print(f"Epoch: {epoch} \t Train Loss: {running_train_loss[-1]:.2f} \t Val Loss: {running_val_loss[-1]:.2f}")

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 1 	 Train Loss: 5235.01 	 Val Loss: 5263.53
Epoch: 2 	 Train Loss: 5250.18 	 Val Loss: 5239.48
Epoch: 3 	 Train Loss: 5135.81 	 Val Loss: 5282.84
Epoch: 4 	 Train Loss: 5220.97 	 Val Loss: 5204.14
Epoch: 5 	 Train Loss: 5240.89 	 Val Loss: 5249.39
Epoch: 6 	 Train Loss: 5224.96 	 Val Loss: 5348.62
Epoch: 7 	 Train Loss: 5182.38 	 Val Loss: 5388.10
Epoch: 8 	 Train Loss: 5315.13 	 Val Loss: 5253.56


KeyboardInterrupt: 