In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)


In [3]:
class TripletDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

        self.class0_idx = np.where(y == 0)[0]
        self.class1_idx = np.where(y == 1)[0]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        anchor = self.X[idx]
        label = self.y[idx]

        if label == 0:
            positive_idx = np.random.choice(self.class0_idx)
            negative_idx = np.random.choice(self.class1_idx)
        else:
            positive_idx = np.random.choice(self.class1_idx)
            negative_idx = np.random.choice(self.class0_idx)

        positive = self.X[positive_idx]
        negative = self.X[negative_idx]

        return (
            torch.tensor(anchor, dtype=torch.float32),
            torch.tensor(positive, dtype=torch.float32),
            torch.tensor(negative, dtype=torch.float32),
        )


In [4]:
class Encoder(nn.Module):
    def __init__(self, input_dim, embedding_dim=64):
        super().__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),

            nn.Linear(128, embedding_dim)
        )

    def forward(self, x):
        return self.net(x)


In [5]:
train_dataset = TripletDataset(X_train_np, y_train_np)
train_loader = DataLoader(
    train_dataset,
    batch_size=256,
    shuffle=True,
    drop_last=True
)


NameError: name 'X_train_np' is not defined

In [None]:
criterion = nn.TripletMarginLoss(margin=1.0, p=2)


In [None]:
input_dim = X_train_np.shape[1]
encoder = Encoder(input_dim, embedding_dim=64).to(device)

optimizer = optim.Adam(encoder.parameters(), lr=1e-3, weight_decay=1e-4)


In [None]:
EPOCHS = 20

for epoch in range(EPOCHS):
    encoder.train()
    total_loss = 0.0

    for anchor, positive, negative in train_loader:
        anchor = anchor.to(device)
        positive = positive.to(device)
        negative = negative.to(device)

        optimizer.zero_grad()

        emb_anchor  = encoder(anchor)
        emb_positive = encoder(positive)
        emb_negative = encoder(negative)

        loss = criterion(emb_anchor, emb_positive, emb_negative)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{EPOCHS} - Triplet Loss: {avg_loss:.4f}")


In [None]:
encoder.eval()
with torch.no_grad():
    train_embeddings = encoder(
        torch.tensor(X_train_np, dtype=torch.float32).to(device)
    ).cpu().numpy()

    test_embeddings = encoder(
        torch.tensor(X_test_np, dtype=torch.float32).to(device)
    ).cpu().numpy()
