In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset,random_split

import config_gcp as config
from model import SiameseNetwork,training
from utils import imshow, show_plot, load_dataset


training_dir = config.training_dir
testing_dir = config.testing_dir
training_csv = config.training_csv
testing_csv = config.testing_csv

In [2]:
# load the dataset
siamese_dataset = load_dataset(training_dir,training_csv)

num_test = 100 # select number of samples to be inferenced
num_other = siamese_dataset.__len__()-num_test
siamese_test, _ = random_split(siamese_dataset, [num_test,num_other])

test_dataloader = DataLoader(
    siamese_test, shuffle=True, num_workers=8, batch_size=1
)


# Load the weight to the network
net = SiameseNetwork().cuda()
net.load_state_dict(torch.load(join("state_dict","AdamW  Parameter Group 0 amsgrad-False betas-0.9, 0.999 eps-1e-08 lr-1e-05 weight_decay-0.0005  batch_size-32validation_error0.4443007972419737.pth")))
net.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Evaluate the Eucledean distance between each pair
test_distance = np.zeros((test_dataloader.__len__(),2))
for i, data in enumerate(test_dataloader, 0):
    x0, x1, label = data
    output1, output2 = net(x0.to(device), x1.to(device))
    eucledian_distance = F.pairwise_distance(output1, output2)
    test_distance[i,0] = eucledian_distance.detach().cpu().numpy()
    test_distance[i,1] = label.detach().cpu().numpy()*2-1

In [25]:
import math

def sortKeyGenerator(i):
    def sortKey(v):
        return v[i]

    return sortKey

train_data =  test_distance.tolist()

F_star = math.inf
m = len(train_data)
d = len(train_data[0]) - 1
b_star = 0

F0_p = sum([1 for data in train_data if data[-1] == 1])
for j in range(d):
    train_data.sort(key=sortKeyGenerator(j))
    F_p = F0_p
    if F_p < F_star:
        F_star = F_p
        theta_star = train_data[0][j] - 1
        j_star = j
        b_star = 1
    for i in range(m - 1):
        F_p -= train_data[i][d]
        if F_p < F_star and train_data[i][j] != train_data[i + 1][j]:
            F_star = F_p
            theta_star = (train_data[i][j] + train_data[i + 1][j]) / 2
            j_star = j
            b_star = 1
    i = m - 1
    F_p -= train_data[i][-1]
    if F_p < F_star:
        F_star = F_p
        theta_star = train_data[i][j] + 0.5
        j_star = j
        b_star = 1

F0_n = sum([1 for data in train_data if data[-1] == -1])

for j in range(d):
    train_data.sort(key=sortKeyGenerator(j))
    F_n = F0_n
    if F_n < F_star:
        F_star = F_n
        theta_star = train_data[0][j] - 1
        j_star = j
        b_star = -1
    for i in range(m - 1):
        F_n += train_data[i][d]
        if F_n < F_star and train_data[i][j] != train_data[i + 1][j]:
            F_star = F_n
            theta_star = (train_data[i][j] + train_data[i + 1][j]) / 2
            j_star = j
            b_star = -1
    i = m - 1
    F_n += train_data[i][-1]
    if F_n < F_star:
        F_star = F_n
        theta_star = train_data[i][j] + 0.5
        j_star = j
        b_star = -1

print("j_star = %d\ntheta_star = %f\npolorization = %d\nEmpirical Error = %f\n" % (j_star, theta_star, b_star, F_star / m))

j_star = 0
theta_star = 0.004577
polorization = 1
Empirical Error = 0.191000

