In [None]:
# Useful Links
# Introduction to Neural Re-Ranking - https://www.youtube.com/watch?v=GSixIsI1eZE

In [None]:
import glob
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
class PairWiseDataset(Dataset):
    def __init__(self, root_dir="./datasets/istella-letor/train_parquet/*"):
        """
        Arguments:
            root_dir (string): Directory with all the queries.
        """
        self.queries = list()
        for query in glob.glob(root_dir):
            self.queries.append(query)

    def __len__(self):
        return len(self.queries)

    def __getitem__(self, idx):
        df = pd.read_parquet(glob.glob(self.queries[idx] + "/*.parquet")[0])

        labels = torch.tensor(df["label"].to_numpy().reshape(-1))
        length_labels = labels.shape[0]
        # TODO: Make 433 a variable
        labels = F.pad(labels, (0, 433 - length_labels), "constant", -1)

        features = torch.tensor(np.array(df["features.values"].values.tolist()))
        features = F.pad(features, (0, 0, 0, 433 - length_labels), "constant", -1)

        return features, labels

In [None]:
train_dataset = PairWiseDataset()
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)

In [None]:
for features, labels in train_dataloader:
    # Here, anchor, positive, and negative are batches of samples
    print(f"positive: {features}\n{features.shape}\n\n")
    print(f"negative: {labels}\n{labels.shape}")
    break

In [None]:
print(features.reshape(-1, 220))

In [None]:
torch.cuda.is_available()

In [None]:
class SingleLayerNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(SingleLayerNet, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.fc(x)

In [None]:
input_size = 10  # Example input size
output_size = 1  # Output size is 1 for ranking scores
learning_rate = 0.01
num_epochs = 100

In [None]:
model = SingleLayerNet(input_size, output_size)
criterion = nn.MarginRankingLoss(margin=1.0)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
x1 = torch.randn(32, input_size)
x2 = torch.randn(32, input_size)

target = torch.ones(32, 1)

In [None]:
model.compile()

In [None]:
for epoch in range(num_epochs):
    # Zero the parameter gradients
    optimizer.zero_grad()

    # Forward pass
    output1 = model(x1)
    output2 = model(x2)

    # Compute the loss
    loss = criterion(output1, output2, target)

    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

print("Training complete.")