In [None]:
import glob
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm import tqdm

from losses import rankNet

# CUDA?

# Data Loader

In [None]:
class PairWiseDataset(Dataset):
    def __init__(self, root_dir="./datasets/istella-letor/train_parquet/*"):
        """
        Arguments:
            root_dir (string): Directory with all the queries.
        """
        self.queries = list()
        for query in glob.glob(root_dir):
            self.queries.append(query)

    def __len__(self):
        return len(self.queries) // 2

    def __getitem__(self, idx):
        df = pd.read_parquet(glob.glob(self.queries[idx] + "/*.parquet")[0])

        labels = torch.tensor(df["label"].to_numpy().reshape(-1))
        length_labels = labels.shape[0]

        # TODO: Make 433 a variable
        labels = F.pad(labels, (0, 433 - length_labels), "constant", -1)

        features = torch.tensor(np.array(df["features.values"].values.tolist(), dtype=np.float32))
        features = F.pad(features, (0, 0, 0, 433 - length_labels), "constant", -1)

        return features, labels

In [None]:
train_dataset = PairWiseDataset()
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=32, shuffle=True, prefetch_factor=4, pin_memory=True)

In [None]:
for features, labels in train_dataloader:
    # Here, anchor, positive, and negative are batches of samples
    print(f"positive: {features}\n{features.shape}\n\n")
    print(f"negative: {labels}\n{labels.shape}")
    break

In [None]:
class DNNLTR(nn.Module):
    def __init__(self, input_size, output_size, hidden_size= 100):
        super(DNNLTR, self).__init__()
        self.layers = nn.Sequential(
          nn.Linear(input_size, 220),
          nn.BatchNorm1d(220),
          nn.ReLU(),
          nn.Linear(220, 64),
          nn.BatchNorm1d(64),
          nn.ReLU(),
          nn.Linear(64, 32),
          nn.BatchNorm1d(32),
          nn.ReLU(),
          nn.Linear(32, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
input_size = 220  # Example input size
output_size = 1  # Output size is 1 for ranking scores
learning_rate = 0.01
num_epochs = 100

model = DNNLTR(input_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(2):
    running_loss = 0.
    # last_loss = 0.
    print(epoch)
    for i, data in enumerate(tqdm(train_dataloader)):
        features = data[0]
        label = data[1].float()

        optimizer.zero_grad()

        # # Forward pass
        output = model(features.reshape(-1, 220)).reshape(-1, 433)
        # print(output)
        # print(output.shape)
        
        # output2 = model(x2)
    
        # # Compute the loss
        loss = rankNet(output, label)
        
        running_loss += loss.item()

    
        # # Backward pass and optimize
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {(running_loss / len(train_dataloader)):.4f}")
    running_loss = 0