In [None]:
# Useful Links
# Introduction to Neural Re-Ranking - https://www.youtube.com/watch?v=GSixIsI1eZE

In [226]:
!pip install tqdm

Collecting tqdm
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tqdm-4.66.4-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.66.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [231]:
import glob
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm import tqdm

# CUDA?

In [236]:
from torch.nn import BCEWithLogitsLoss
def rankNet(y_pred, y_true, padded_value_indicator=-1, weight_by_diff=False, weight_by_diff_powed=False):
    """
    RankNet loss introduced in "Learning to Rank using Gradient Descent".
    :param y_pred: predictions from the model, shape [batch_size, slate_length]
    :param y_true: ground truth labels, shape [batch_size, slate_length]
    :param weight_by_diff: flag indicating whether to weight the score differences by ground truth differences.
    :param weight_by_diff_powed: flag indicating whether to weight the score differences by the squared ground truth differences.
    :return: loss value, a torch.Tensor
    """
    y_pred = y_pred.clone()
    y_true = y_true.clone()

    mask = y_true == padded_value_indicator
    y_pred[mask] = float('-inf')
    y_true[mask] = float('-inf')

    # here we generate every pair of indices from the range of document length in the batch
    document_pairs_candidates = list(product(range(y_true.shape[1]), repeat=2))

    pairs_true = y_true[:, document_pairs_candidates]
    selected_pred = y_pred[:, document_pairs_candidates]

    # here we calculate the relative true relevance of every candidate pair
    true_diffs = pairs_true[:, :, 0] - pairs_true[:, :, 1]
    pred_diffs = selected_pred[:, :, 0] - selected_pred[:, :, 1]

    # here we filter just the pairs that are 'positive' and did not involve a padded instance
    # we can do that since in the candidate pairs we had symetric pairs so we can stick with
    # positive ones for a simpler loss function formulation
    the_mask = (true_diffs > 0) & (~torch.isinf(true_diffs))

    pred_diffs = pred_diffs[the_mask]

    weight = None
    if weight_by_diff:
        abs_diff = torch.abs(true_diffs)
        weight = abs_diff[the_mask]
    elif weight_by_diff_powed:
        true_pow_diffs = torch.pow(pairs_true[:, :, 0], 2) - torch.pow(pairs_true[:, :, 1], 2)
        abs_diff = torch.abs(true_pow_diffs)
        weight = abs_diff[the_mask]

    # here we 'binarize' true relevancy diffs since for a pairwise loss we just need to know
    # whether one document is better than the other and not about the actual difference in
    # their relevancy levels
    true_diffs = (true_diffs > 0).type(torch.float32)
    true_diffs = true_diffs[the_mask]

    return BCEWithLogitsLoss(weight=weight)(pred_diffs, true_diffs)

# Data Loader

In [249]:
class PairWiseDataset(Dataset):
    def __init__(self, root_dir="./datasets/istella-letor/train_parquet/*"):
        """
        Arguments:
            root_dir (string): Directory with all the queries.
        """
        self.queries = list()
        for query in glob.glob(root_dir):
            self.queries.append(query)

    def __len__(self):
        return len(self.queries) // 2

    def __getitem__(self, idx):
        df = pd.read_parquet(glob.glob(self.queries[idx] + "/*.parquet")[0])

        labels = torch.tensor(df["label"].to_numpy().reshape(-1))
        length_labels = labels.shape[0]
        # TODO: Make 433 a variable
        labels = F.pad(labels, (0, 433 - length_labels), "constant", -1)

        features = torch.tensor(np.array(df["features.values"].values.tolist(), dtype=np.float32))
        features = F.pad(features, (0, 0, 0, 433 - length_labels), "constant", -1)

        return features, labels

In [250]:
train_dataset = PairWiseDataset()
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=32, shuffle=True)

In [251]:
for features, labels in train_dataloader:
    # Here, anchor, positive, and negative are batches of samples
    print(f"positive: {features}\n{features.shape}\n\n")
    print(f"negative: {labels}\n{labels.shape}")
    break

positive: tensor([[[ 0.0000e+00,  1.0000e+03,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  1.0130e+03,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  1.0160e+03,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         ...,
         [-1.0000e+00, -1.0000e+00, -1.0000e+00,  ..., -1.0000e+00,
          -1.0000e+00, -1.0000e+00],
         [-1.0000e+00, -1.0000e+00, -1.0000e+00,  ..., -1.0000e+00,
          -1.0000e+00, -1.0000e+00],
         [-1.0000e+00, -1.0000e+00, -1.0000e+00,  ..., -1.0000e+00,
          -1.0000e+00, -1.0000e+00]],

        [[ 0.0000e+00,  1.0230e+03,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  1.0400e+03,  2.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  1.2600e+02,  2.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         ...,
         [-1.00

In [252]:
class DNNLTR(nn.Module):
    def __init__(self, input_size, output_size, hidden_size= 100):
        super(DNNLTR, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [253]:
features.reshape(220, -1).shape

torch.Size([220, 13856])

In [254]:
model(features.reshape(-1, 220))

tensor([[7.7830e+03],
        [7.6529e+03],
        [7.7664e+03],
        ...,
        [3.1298e-01],
        [3.1298e-01],
        [3.1298e-01]], grad_fn=<AddmmBackward0>)

In [255]:
input_size = 220  # Example input size
output_size = 1  # Output size is 1 for ranking scores
learning_rate = 0.01
num_epochs = 100

model = DNNLTR(input_size, output_size)
# criterion = nn.MarginRankingLoss(margin=1.0)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [256]:
for epoch in range(1):
    print(epoch)
    for i, data in enumerate(tqdm(train_dataloader)):
        features = data[0]
        label = data[1].float()

        optimizer.zero_grad()

        # # Forward pass
        output = model(features).squeeze(-1)
        # print(output)
        # print(output.shape)
        
        # output2 = model(x2)
    
        # # Compute the loss
        loss = rankNet(output, label)
        # print(loss)
        # break
    
        # # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

0


100%|█████████████████████████████████████████████████████████████████████████████████| 363/363 [01:27<00:00,  4.16it/s]

Epoch [1/100], Loss: nan





In [238]:
output.squeeze(-1)

tensor([[-8.1524e+02, -8.3098e+02, -9.2494e+02, -8.4188e+02, -8.0577e+02,
         -1.1002e+03, -8.3113e+02, -8.1476e+02, -9.4506e+02, -7.7314e+02,
         -2.4954e+03, -9.3460e+02, -8.1591e+02, -8.0336e+02, -2.9345e+03,
         -1.5114e+03, -9.4529e+02, -1.1154e+03, -8.9665e+02, -8.8047e+02,
         -8.3442e+02, -8.9205e+02, -1.8085e+03, -8.1146e+02, -9.6781e+02,
         -8.8013e+02, -8.3312e+02, -8.9791e+02, -8.3417e+02, -9.0948e+02,
         -2.3312e+03, -8.9629e+02, -1.3082e+03, -8.4684e+02, -8.2488e+02,
         -8.3881e+02, -1.5201e+03, -7.9262e+02, -7.9045e+02, -8.5237e+02,
         -8.3793e+02, -7.8648e+02, -1.2979e+03, -7.8789e+02, -8.0407e+02,
         -8.1880e+02, -8.0146e+02, -7.8685e+02, -8.3741e+02, -8.3371e+02,
         -9.3610e+02, -8.0021e+02, -1.1307e+03, -8.1219e+02, -8.0421e+02,
         -8.0168e+02, -8.8506e+02, -8.5868e+02, -8.2921e+02, -1.6071e+03,
         -1.1881e+03, -7.8677e+02, -7.9797e+02, -7.8644e+02, -1.4684e+03,
         -1.0041e+03, -8.3388e+02, -1.

In [216]:
# x1 = torch.randn(32, input_size)
# x2 = torch.randn(32, input_size)

# target = torch.ones(32, 1)

In [217]:
# x1.shape

torch.Size([32, 10])

In [None]:
# model.compile()

In [None]:
# for epoch in range(num_epochs):
#     # Zero the parameter gradients
    optimizer.zero_grad()

    # Forward pass
    output1 = model(x1)
    output2 = model(x2)

    # Compute the loss
    loss = criterion(output1, output2, target)

    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# print("Training complete.")