In [1]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import json
import os
from sklearn.model_selection import train_test_split



# OK let's first see what we're working with

In [2]:
def read_and_show_img(filepath):
    plt.figure(figsize = (30,30))
    image = cv2.imread(filepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
    plt.show()

In [3]:
uav = "dataset/RGB/uav_images/"
sat = "dataset/RGB/sat_images/"
fsat = "dataset/RGB/false_sat_images/"

In [None]:
read_and_show_img(uav + "DJI_0669.JPG")

In [None]:
read_and_show_img(sat+"DJI_0669.JPG")

In [None]:
read_and_show_img(fsat+"(38.5727858867262,-90.1775736641245), (38.57184403801162, -90.17577406541481).jpg")

In [None]:
with open("dataset/RGB/metadata.json", "r") as f:
    metadata = json.load(f)

In [None]:
metadata

## Ok, so here I assume that images from sat and uav dirs with the same name are images of the same location that should be matched, while false sat images are random locations to check for false positives when matching.
Also, for now I'll mostly ignore metadata, though not yet sure if there's something useful for the task, like the information on season etc

# Initial plan
<ol>
  <li>Build simple dataset with train/validation/test split, with pairs of images, and values 0 if it's different location and 1 if the same location </li>
    <li>Create simple baseline solution</li>
    <li/> Evaluate
    <li/>See how good it is, what's bad and what to do next
</ol>

For reproducability let's set random seed constant for everything

In [None]:
seed = 13
np.random.seed(seed)

In [None]:
correct_names_sat = np.array(os.listdir(sat))
correct_names_uav = np.array(os.listdir(uav))
false_names = np.array(os.listdir(fsat))

In [None]:
set(correct_names_sat) == set(correct_names_uav)

In [None]:
len(correct_names_sat)

In [None]:
len(false_names)

Ok so balance is around 1/16

In [None]:
ctrain, ctest = train_test_split(correct_names_sat, test_size = 0.2, shuffle=True, random_state=seed)

In [None]:
ftrain, ftest = train_test_split(false_names, test_size = 0.2, shuffle=True, random_state=seed)

In [None]:
ctrain.size

Now, let's define what is correct and what is not correct
So far, if img has same name it represents same location, even if in different folders
And if the img has different name, even if in the same folder, it's different location
Then types of pairs can be:
<ol>
    <li>identical images from any folder - this part I'll drop as I asumme it'll only pollute the dataset</li>
    <li>pair of sat image and uav image with same name - correct pair with value of 1 </li>
    <li>pair of sat image and uav image with a different name - incorrect pair with value of 0 </li>
    <li>pair of sat image and false image - incorrect pair with value of 0 </li>
    <li>pair of uav image and false image - incorrect pair with value of 0 </li>
    <li>pair of two non-identical false images - incorrect pair with value of 0 </li>
</ol>
For initial version I'll aim to a split to approximately have 1 / 1 / 3 / 9, which would mean 1 to 13 balance of classes, which is somewhat similar to 1/16 image split

In [None]:
def create_correct_set(arr):
    return np.array([[uav + x, sat + x, 1] for x in arr])

In [None]:
def make_two_arrays(arr):
    r = np.array(arr, copy=True)  
    l = np.array(arr, copy=True)
    np.random.shuffle(l)
    return r, l

def create_incorrect_same_category(arr, pref1, pref2, limit = 10000):
    r, l = make_two_arrays(arr)
    same = []
    res = []
    #maybe it's a little inefficient, but it doesn't impact performance of final solution so should be fine
    while len(r) > 1 or len(res) == 0 or len(res) >= limit:
        for i in range(len(r)):
            if r[i] != l[i]:
                res.append([pref1 + r[i], pref2 + l[i], 0])
                if len(res) == limit:
                    return np.array(res)
            else:
                same.append(r[i])
        r, l = make_two_arrays(same)
        same = []
    return np.array(res)

In [None]:
correct = create_correct_set(ctrain)

In [None]:
incorrect1 = create_incorrect_same_category(ctrain, uav, sat)

In [None]:
incorrect1

In [None]:
correct.shape

In [None]:
def create_correct_false_pairs(cvals, fvals, scale_factor, pref1, pref2):
    from_correct = np.array(list(cvals) * 3)
    fvalues = np.array(list(fvals) * (int(len(from_correct) / len(fvals)) + 1))
    #To make selection more smooth, same item can't be selected more than 1 time on the current dataset, so it forces to select different ones
    random_choices = np.random.choice(fvalues, len(from_correct), replace = False)
    return np.array([[pref1 + from_correct[i], pref2 + random_choices[i], 0] for i in range(len(random_choices))])

In [None]:
incorrect2 = create_correct_false_pairs(ctrain, ftrain, 3, uav, fsat)

In [None]:
incorrect3 = create_incorrect_same_category(ftrain, fsat, fsat, len(correct) * 9)

In [None]:
resulting_array = np.concatenate((correct, incorrect1, incorrect2, incorrect3))

In [None]:
df = pd.DataFrame(resulting_array, columns = ["path1", "path2", "label"])

In [None]:
print(df[df.label == "1"].values)

now I'll assemble it into a function, as I'll need 2 of those

In [None]:
def getDfSplit(carr, farr, scale_one_correct, scale_no_correct):
    print("creating correct")
    correct = create_correct_set(carr)
    print("correct_number " + str(len(correct)))
    print("creating incorrect1")
    incorrect1 = create_incorrect_same_category(carr, uav, sat)
    print("icorrect1_number " + str(len(incorrect1)))
    print("creating incorrect2")
    incorrect2 = create_correct_false_pairs(carr, farr, scale_one_correct, uav, fsat)
    print("icorrect2_number " + str(len(incorrect2)))
    print("creating incorrect3")
    incorrect3 = create_incorrect_same_category(farr, fsat, fsat, len(correct) * scale_no_correct)
    print("icorrect3_number " + str(len(incorrect3)))
    print("Concatenating")
    resulting_array = np.concatenate((correct, incorrect1, incorrect2, incorrect3))
    print("concatenated_number " + str(len(resulting_array)))
    return pd.DataFrame(resulting_array, columns = ["path1", "path2", "label"])

In [None]:
df_train = getDfSplit(ctrain, ftrain, 3, 9)
df_test = getDfSplit(ctest, ftest, 3, 9)

In [None]:
df_train

In [None]:
df_train

In [None]:
df_test

In [None]:
df_test.to_csv("df_test.csv")

### Now, that dataset is at least defined, we can proceed to actual algorithm 
For this, because images can be different in brightness, color, rotation, etc, we need some kind of feature extraction algorithm
Basically, there are 2 ways, somewhat oldschool with known feature extractor like SIFT on both images and with some ML algorith compare them; and second way is to create some CNN architecture, like siamese networks, or to do both or multitude of one and ensemble. I'll rule out ensembling, as it tends to increase computing times significantly.
Let's start with some CNN and see what we have from there

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.v2 as transforms
import torch.optim as optim
from torchvision.io import read_image
from torch import tensor
import time


In [None]:
if torch.cuda.is_available(): 
    dev = "cuda:0" 
else: 
    dev = "cpu" 
device = torch.device(dev)

In [None]:
import pickle

In [None]:
def preprocess_and_save_dataset(path, dataloader):
    for i, data in enumerate(dataloader):
        with open(path + str(i) + ".pkl", 'wb') as outp:
            pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)

In [None]:
for i, data in enumerate(dataloader):
    with open("processed_dataset/" + str(i) + ".pkl", 'wb') as outp:
        pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)

In [None]:
for i, data in enumerate(dataloader_eval):
    with open("processed_eval/" + str(i) + ".pkl", 'wb') as outp:
        pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)

In [None]:
with open("processed_dataset/0.pkl", 'rb') as inp:
    company1 = pickle.load(inp)
    print(company1)


In [None]:
class SiameseResNet(nn.Module):
    def __init__(self):
        super(SiameseResNet, self).__init__()
        if torch.cuda.is_available(): 
            dev = "cuda:0" 
        else: 
            dev = "cpu" 
        self.dev = torch.device(dev) 
        # Load pre-trained ResNet-18 models
        self.cnn1 = self.load_pretrained_model()
        self.cnn2 = self.load_pretrained_model()
        
        for param in self.cnn1.parameters():
            param.requires_grad = False
        for param in self.cnn2.parameters():
            param.requires_grad = False
        
        # Add a new layer on top
        self.classifier = nn.Sequential(
            nn.LayerNorm(25088 * 2),
            nn.Linear(25088 * 2, 4096),
            nn.LayerNorm(4096),
            nn.LeakyReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.LayerNorm(4096),
            nn.LeakyReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 1),
        )# Input size is 512 for ResNet-18
        self.to(self.dev)
        
    def load_pretrained_model(self):
        # Download and load a pre-trained ResNet-18 model
        model = models.vgg16(weights=True)
        return model.features
        
    def forward_once(self, x1, x2):
        # Forward pass through both of the CNN branches
        out1 = self.cnn1(x1)
        out1 = out1.view(out1.size()[0], -1)
        out2 = self.cnn2(x2)
        out2 = out2.view(out2.size()[0], -1)
        return out1, out2
    
    def forward(self, input1, input2):
        # Forward pass through both CNN branches
        output1, output2 = self.forward_once(input1.to(self.dev), input2.to(self.dev))
        
        # Concatenate the outputs of both branches
        combined = torch.cat((output1, output2), dim=1)
        #print("COMBINED")
        #print(combined)
        # Forward pass through the new layer
        output = self.classifier(combined)
        #print("OUTPUT : " + str(output))
        #print("SIGMOID")
        #print(torch.sigmoid(output))
        return torch.sigmoid(output)  # Apply sigmoid activation for binary classification

# Instantiate the SiameseResNet model
model = SiameseResNet()

# Print the model architecture
print(model)

In [None]:
model.parameters

In [None]:
# Custom dataset class
class SiameseDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img1_path, img2_path, label = self.data[idx]
        
        # Load images using OpenCV
        img1 = read_image(img1_path)
        img2 = read_image(img2_path)
        
        # Apply transformations if specified
        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        tens = torch.tensor(float(label))
        return img1, img2, tens

# Transformations to apply to the images
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert numpy arrays to PIL images
    transforms.Resize((224, 224)),# Resize images to fit ResNet input size
    transforms.ToTensor(),# Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize image pixels
])

# Create a SiameseDataset instance
dataset = SiameseDataset(df_train.values, transform=transform)

# Define data loader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

eval_set = SiameseDataset(df_test.values, transform=transform)
dataloader_eval = DataLoader(eval_set, batch_size=batch_size, shuffle=False)

Here I've noticed that 90% of the time learning is actually spent in preprocessing stage, so we might as well run it once, store results (which is approx 5gb, which might even fit into RAM or for 16 gb VRAM even into it, but I'll continue with this setup), and then just read already processed images

In [None]:
def load_batch(path, num):
    with open(path + str(num) + ".pkl", 'rb') as inp:
        data = pickle.load(inp)
        return data

In [None]:
def evaluate_model(model, dataloader1, dataloader2):
    loss1, acc1 = model_eval(model, dataloader1)
    loss2, acc2 = model_eval(model, dataloader2)
    return loss1, loss2, acc1, acc2

def model_eval(model, dataloader):
    total_correct = 0.
    total_samples = 0.
    total_loss = 0.
    with torch.no_grad():
        for i, data in enumerate(dataloader):
            img1, img2, labels = data
            reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)
            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(img1, img2)
            # Calculate loss
            loss = criterion(outputs, reshaped_labels)
            
            predictions = (outputs > 0.5).float()
            correct = (predictions == reshaped_labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)
            total_loss += loss
    return total_loss, float(total_correct)/total_samples

In [None]:
def evaluate_model_prep(model, criterion = nn.BCELoss(weight = tensor([13]).to(device))):
    path_train = "processed_dataset/"
    train_max_index = 148
    path_test = "processed_eval/"
    test_max_index = 37
    loss1, acc1 = model_eval_prep(model, path_train, train_max_index, criterion)
    loss2, acc2 = model_eval_prep(model, path_test, test_max_index, criterion)
    return loss1, loss2, acc1, acc2

def model_eval_prep(model, path, max_index, criterion):
    total_correct = 0.
    total_samples = 0.
    total_loss = 0.
    with torch.no_grad():
        for i in range (max_index + 1):
            with open(path + str(i) + ".pkl", 'rb') as inp:
                data = pickle.load(inp)
            img1, img2, labels = data
            reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)
            # Forward pass
            outputs = model(img1, img2)
            # Calculate loss
            loss = criterion(outputs, reshaped_labels)
            
            predictions = (outputs > 0.5).float()
            correct = (predictions == reshaped_labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)
            total_loss += loss
    return total_loss/total_samples, float(total_correct)/total_samples

In [None]:
weight = tensor([ 13.]).to(device) #
criterion = nn.BCELoss(weight = weight) #Initially I used BCELoss but it appears to be bugged https://discuss.pytorch.org/t/model-weights-not-being-updated/1842/6
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the network
num_epochs = 3
path_train = "processed_dataset/"
train_max_index = 148

for epoch in range(num_epochs):
    running_loss = 0.0
    epoch_start = time.time()
    loading_time_start = time.time()
    
    for i in range(train_max_index + 1):
        with open(path_train + str(i) + ".pkl", 'rb') as inp:
            data = pickle.load(inp)
        if i % 10 == 9:
            print("loading time : " + str(time.time() - loading_time_start))
        mini_epoch_start = time.time()
        img1, img2, labels = data
        reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)
        # Zero the parameter gradients
        optimizer.zero_grad()
        value_before = float(list(model.parameters())[-1][0])
        # Forward pass
        outputs = model(img1, img2)
        # Calculate loss
        loss = criterion(outputs, reshaped_labels)
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        value_after = float(list(model.parameters())[-1][0])
        if abs(value_before - value_after) < 0.000000000001:
            print("VALUES ARE THE SAME")
        
        # Print statistics
        running_loss += loss.item()
        if i % 10 == 9:  # Print every 10 mini-batches
            print('%f per batch : [%d, %5d] loss: %.3f' %
                  (time.time() - mini_epoch_start, epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0
        loading_time_start = time.time()
    model.eval()
    loss_train, loss_test, acc_train, acc_test = evaluate_model_prep(model)
    print('\n loss train: %.3f; acc train %.3f \n loss test %.3f acc test %.3f' %
                  (loss_train, acc_train * 100, loss_test, acc_test * 100))
    model.train()
    epoch_end = time.time()
    print("EPOCH TIME : " + str(epoch_end - epoch_start))
print('Finished Training')

In [None]:
for k in enumerate(dataloader):
    print(k)
    

In [None]:
train_features, train_labels = next(iter(dataloader))

In [None]:
dataset.__getitem__(0)

In [None]:

torch.cuda.is_available()

In [None]:
img = read_image(uav + "DJI_0669.JPG")

In [None]:
time.time()

In [None]:
plt.imshow(img.permute(1, 2, 0))

In [None]:
model = models.vgg16(weights=True)

In [None]:
old_class = model.classifier

In [None]:
old_class

In [None]:
old_class[6] = nn.Linear(4096, 1)

In [None]:
model

In [None]:
mini_epoch_start = time.time()
k = 0
for i, data in enumerate(dataloader):
    print("time to get next " + str(time.time() - mini_epoch_start))
    k+=1
    if (k > 4):
        break

In [None]:
data

In [None]:
for k in model.parameters():
    print(k)

In [None]:
float(list(model.parameters())[-1][0])

In [None]:
with open(path_train + str(1) + ".pkl", 'rb') as inp:
        data = pickle.load(inp)

In [None]:
data[0].std()

OK so time to evaluate it to some extent

In [None]:
correct_labels = []
predict_labels = []
model.eval()
path = "processed_eval/"
max_index = 37
with torch.no_grad():
    for i in range (max_index + 1):
        with open(path + str(i) + ".pkl", 'rb') as inp:
            data = pickle.load(inp)
        img1, img2, labels = data
        correct_labels = correct_labels + labels.tolist()
        reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(img1, img2)
        # Calculate loss
        loss = criterion(outputs, reshaped_labels)
        predictions = (outputs > 0.5).float()
        predict_labels = predict_labels + predictions.tolist()


In [None]:
correct_labels

In [None]:
predict_labels

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [None]:
confusion_matrix(correct_labels, predict_labels)

In [None]:
precision_score(correct_labels, predict_labels)

In [None]:
recall_score(correct_labels, predict_labels)

In [None]:
f1_score(correct_labels, predict_labels)

So after initial experiment I have
precision 0.5897435897435898
recall 0.27058823529411763
f1 0.3709677419354838
So, I decided to repeat experiment, but now to upscale number of positive samples in the dataset and repeat learning. I'll keep same number of negative samples

In [None]:
def getDfSplitUpscaled(carr, farr, scale_one_correct, scale_no_correct):
    print("creating correct")
    correct = create_correct_set(carr)
    print("correct_number " + str(len(correct)))
    print("creating incorrect1")
    incorrect1 = create_incorrect_same_category(carr, uav, sat)
    print("icorrect1_number " + str(len(incorrect1)))
    print("creating incorrect2")
    incorrect2 = create_correct_false_pairs(carr, farr, scale_one_correct, uav, fsat)
    print("icorrect2_number " + str(len(incorrect2)))
    print("creating incorrect3")
    incorrect3 = create_incorrect_same_category(farr, fsat, fsat, len(correct) * scale_no_correct)
    print("icorrect3_number " + str(len(incorrect3)))
    print("Concatenating")
    resulting_array = np.concatenate((correct, correct, correct, correct, incorrect1, incorrect2, incorrect3))
    print("concatenated_number " + str(len(resulting_array)))
    return pd.DataFrame(resulting_array, columns = ["path1", "path2", "label"])

In [None]:
df_train_upscaled = getDfSplitUpscaled(ctrain, ftrain, 3, 9)

In [None]:
df_train_upscaled[df_train_upscaled.label == "1"]

In [None]:
# Create a SiameseDataset instance
dataset_upscaled = SiameseDataset(df_train_upscaled.values, transform=transform)

# Define data loader
batch_size = 32
dataloader_upscaled = DataLoader(dataset_upscaled, batch_size=batch_size, shuffle=True)


In [None]:
preprocess_and_save_dataset("processed_dataset/", dataloader_upscaled)

In [None]:
def train_model(model, num_epochs, train_max_index, path_train, weight_n, lr):
    weight = tensor([weight_n]).to(device) #
    criterion = nn.BCELoss(weight = weight) #Initially I used BCELoss but it appears to be bugged https://discuss.pytorch.org/t/model-weights-not-being-updated/1842/6
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(num_epochs):
        running_loss = 0.0
        epoch_start = time.time()
        loading_time_start = time.time()

        for i in range(train_max_index + 1):
            with open(path_train + str(i) + ".pkl", 'rb') as inp:
                data = pickle.load(inp)
            if i % 10 == 9:
                print("loading time : " + str(time.time() - loading_time_start))
            mini_epoch_start = time.time()
            img1, img2, labels = data
            reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)
            # Zero the parameter gradients
            optimizer.zero_grad()
            value_before = float(list(model.parameters())[-1][0])
            # Forward pass
            outputs = model(img1, img2)
            # Calculate loss
            loss = criterion(outputs, reshaped_labels)
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            value_after = float(list(model.parameters())[-1][0])
            if abs(value_before - value_after) < 0.000000000001:
                print("VALUES ARE THE SAME")

            # Print statistics
            running_loss += loss.item()
            if i % 10 == 9:  # Print every 10 mini-batches
                print('%f per batch : [%d, %5d] loss: %.3f' %
                      (time.time() - mini_epoch_start, epoch + 1, i + 1, running_loss / 10))
                running_loss = 0.0
            loading_time_start = time.time()
        model.eval()
        loss_train, loss_test, acc_train, acc_test = evaluate_model_prep(model, criterion)
        print('\n loss train: %.3f; acc train %.3f \n loss test %.3f acc test %.3f' %
                      (loss_train, acc_train * 100, loss_test, acc_test * 100))
        model.train()
        epoch_end = time.time()
        print("EPOCH TIME : " + str(epoch_end - epoch_start))
    print('Finished Training')
    return model

Here I restart it and not rerun producing datasets, just using them to train new model

In [None]:
model = SiameseResNet()

In [None]:
model = train_model(model, 2, 180, "processed_dataset/", 13, 0.001)

In [None]:
def get_predictions_and_metrics(model):
    correct_labels = []
    predict_labels = []
    model.eval()
    path = "processed_eval/"
    max_index = 37
    with torch.no_grad():
        for i in range (max_index + 1):
            with open(path + str(i) + ".pkl", 'rb') as inp:
                data = pickle.load(inp)
            img1, img2, labels = data
            correct_labels = correct_labels + labels.tolist()
            reshaped_labels = torch.reshape(labels, (labels.shape[0], 1)).to(device)

            # Forward pass
            outputs = model(img1, img2)
            # Calculate loss
            predictions = (outputs > 0.5).float()
            predict_labels = predict_labels + predictions.tolist()
    print(confusion_matrix(correct_labels, predict_labels))
    print('precision %.3f, recall %.3f, f1 %.3f' %
              (precision_score(correct_labels, predict_labels), recall_score(correct_labels, predict_labels), f1_score(correct_labels, predict_labels)))

In [None]:
get_predictions_and_metrics(model)

Ok so I guess it's a bit too prone to predicting positive now, so I'll do the same but 
1. In different notebook
2. a bit more sophisticated NN (+1 layer prob)
3. With smaller class weight