In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset


plt.rcParams['figure.figsize'] = [16, 4]

In [None]:
num_rows = 131072
num_features = 8

X, _ = make_classification(n_samples=num_rows, n_features=num_features)

scaler = StandardScaler()
X = scaler.fit_transform(X)

coefficients = np.random.randn(num_features)
linear_combination = np.dot(X, coefficients)

target = 1 / (1 + np.exp(-linear_combination))
target = np.clip(target, 0, 1)

In [None]:
X

In [None]:
target

In [None]:
sns.kdeplot(target)

In [None]:
threshold = np.median(target)

zero_pairs = np.where(target < threshold)[0]
zero_pairs = np.random.choice(zero_pairs, size=(65536, 2), replace=True)

one_pairs = np.where(target > threshold)[0]
one_pairs = np.random.choice(one_pairs, size=(65536, 2), replace=True)

In [None]:
class TabularSiameseDataset(Dataset):
	
	def __init__(self, data, labels):
		self.data = data
		self.labels = labels
	
	def __len__(self):
		return len(self.labels)
	
	def __getitem__(self, idx):
		index1, index2, label = self.labels[idx]
		return self.data[index1], self.data[index2], label


# Example Data (randomly generated)
data = torch.randn(100, 10)  # 100 samples, 10 features each
pairs = [(i, i + 1, 1) for i in range(0, 50, 2)] + [(i, i + 1, 0) for i in range(1, 50, 2)]
dataset = TabularSiameseDataset(data, pairs)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

In [None]:
class SiameseNetwork(nn.Module):
	
	def __init__(self):
		super(SiameseNetwork, self).__init__()
		self.net = nn.Sequential(
				nn.Linear(10, 20),
				nn.ReLU(),
				nn.Linear(20, 10),
				nn.ReLU(),
				nn.Linear(10, 5)  # Embedding space dimension
				)
	
	def forward(self, input1, input2):
		output1 = self.net(input1)
		output2 = self.net(input2)
		return output1, output2


model = SiameseNetwork()

In [None]:
class ContrastiveLoss(nn.Module):
	
	def __init__(self, margin=1.0):
		super(ContrastiveLoss, self).__init__()
		self.margin = margin
	
	def forward(self, output1, output2, label):
		euclidean_distance = nn.functional.pairwise_distance(output1, output2)
		loss = (1 - label) * torch.pow(euclidean_distance, 2) + \
		       label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
		return loss.mean()

In [None]:
loss_fn = ContrastiveLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
def train(dataloader, model, loss_fn, optimizer, epochs):
	model.train()
	for epoch in range(epochs):
		running_loss = 0.0
		for data1, data2, labels in dataloader:
			optimizer.zero_grad()
			outputs1, outputs2 = model(data1, data2)
			loss = loss_fn(outputs1, outputs2, labels)
			loss.backward()
			optimizer.step()
			running_loss += loss.item()
		print(f'Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}')


# Train the model
train(dataloader, model, loss_fn, optimizer, epochs=10)