In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm

In [2]:
class Dataset(torch.utils.data.Dataset):
  def __init__(self, X, y):
    self.x_train=torch.tensor(X, dtype=torch.float32)
    self.y_train=torch.tensor(y, dtype=torch.float32)

  def __len__(self):
    return len(self.y_train)
  
  def __getitem__(self,idx):
    return self.x_train[idx], self.y_train[idx]

In [3]:
class MLP(nn.Module):
    def __init__(self, num_in_features=11, dropout=0.3):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(num_in_features, 1000),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(1000, 1000),
            nn.Dropout(p=dropout),
            nn.ReLU(),
            nn.Linear(1000, 1),
        )
    
    def forward(self, x):
        return self.layers(x.float())

In [4]:
# Set fixed random number seed
torch.manual_seed(42)

# Load dataset
features = ['priorProb', 'entityPrior', 'maxPriorProb', 'numCands',
            'editDist', 'mentionIsCand', 'mentionInCand', 'isStartorEnd',
            'contextSim', 'coherence', 'rank']

X = pd.read_csv(f"./data/GBRT/wiki2vec_w10_100d.pkl_train.csv")
dataset = Dataset(X[features].values, X['y'].values)
trainloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the MLP
mlp = MLP().to(device)

# Define the loss function and optimizer
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=3e-4)

In [5]:
def train(epochs):
    mlp.train()
    # Run the training loop
    for epoch in range(epochs):
        # Set current loss value
        total_loss = 0.0
        num_batches = 0

        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
            # Get and prepare inputs
            inputs, targets = data
            inputs, targets = inputs.float().to(device), targets.float().to(device)
            targets = targets.reshape((targets.shape[0], 1))

            # Zero the gradients
            optimizer.zero_grad()

            # Perform forward pass
            outputs = mlp(inputs)

            # Compute loss
            loss = loss_function(outputs, targets)

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Record loss
            num_batches += 1
            total_loss += loss.item()
        
        print(f"[{epoch+1}/{epochs}] Average Loss: {'%.5f'%(total_loss/num_batches)}")
    mlp.eval()

In [6]:
try:
    train(500)
except KeyboardInterrupt:
    pass

[1/500] Average Loss: 0.10137
[2/500] Average Loss: 0.02501
[3/500] Average Loss: 0.02249
[4/500] Average Loss: 0.02143
[5/500] Average Loss: 0.02022
[6/500] Average Loss: 0.01915
[7/500] Average Loss: 0.01850
[8/500] Average Loss: 0.01763
[9/500] Average Loss: 0.01712
[10/500] Average Loss: 0.01677
[11/500] Average Loss: 0.01626
[12/500] Average Loss: 0.01596
[13/500] Average Loss: 0.01571
[14/500] Average Loss: 0.01550
[15/500] Average Loss: 0.01508
[16/500] Average Loss: 0.01498
[17/500] Average Loss: 0.01475
[18/500] Average Loss: 0.01457
[19/500] Average Loss: 0.01433
[20/500] Average Loss: 0.01422
[21/500] Average Loss: 0.01421
[22/500] Average Loss: 0.01398
[23/500] Average Loss: 0.01395
[24/500] Average Loss: 0.01377
[25/500] Average Loss: 0.01373
[26/500] Average Loss: 0.01364
[27/500] Average Loss: 0.01359
[28/500] Average Loss: 0.01356
[29/500] Average Loss: 0.01341
[30/500] Average Loss: 0.01337
[31/500] Average Loss: 0.01330
[32/500] Average Loss: 0.01331
[33/500] Average 

In [7]:
torch.save(mlp.state_dict(), './data/NN_ranker.pt')
mlp.load_state_dict(torch.load('./data/NN_ranker.pt'))

<All keys matched successfully>

In [8]:
x = X[features].values[-1].tolist()
with torch.no_grad():
    print(mlp(torch.Tensor(x).to(device)).item())

0.966212809085846
