# 1. Neural Network Training

In [1]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

import torch
import torch.nn as nn
import numpy as np
from joblib import dump
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import ndcg_score

from RankingAlgorithms.neuralnetwork import NeuralNetwork
from RankingAlgorithms.pwsvm import RankSVM
from DataHandling.train_data import load_data


### 1.1. Load training and test data

In [2]:
feature_indices = [108, 18, 103, 3, 8, 13, 125, 126, 107, 17, 102, 2, 7, 12, 105, 15, 100, 0, 5, 10]

In [3]:
# load training data
X_train, y_train = load_data(path="../../../data/MSLR-WEB10K/Fold1/train.txt", nrows=10000, feature_indices=feature_indices)
#X_test, y_test = load_data(path="../../../data/MSLR-WEB10K/Fold1/test.txt", nrows=10000)


In [4]:
print('label counts', np.unique(y_train, return_counts=True))
n_samples_per_class = np.unique(y_train, return_counts=True)[1][-1]

label counts (array([0., 1., 2., 3., 4.]), array([5481, 3000, 1326,  142,   51]))


In [5]:
# balance dataset
indices = []
for label in range(5):
    indices.append(list(np.random.choice(np.where(y_train == label)[0], n_samples_per_class, replace=False)))
    
indices = np.array(indices).flatten()

X_cut = X_train[indices, :]
y_cut = y_train[indices]

print(X_cut.shape, y_cut.shape)
print('label counts: ', np.unique(y_cut, return_counts=True))

(255, 20) (255,)
label counts:  (array([0., 1., 2., 3., 4.]), array([51, 51, 51, 51, 51]))


In [6]:
y_trans = torch.zeros(len(y_cut), 5)
for i, label in enumerate(y_cut):
    y_trans[i, 0:int(label)+1] = 1 


### 1.2. Create model

In [7]:
model = NeuralNetwork(n_features=len(feature_indices), n_hidden=10, load=False)

In [8]:
### define optimizer and loss fct
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss = nn.CrossEntropyLoss()

### 1.3. Train Model

In [9]:
for epoch in range(100):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss = model.train_loop(torch.Tensor(X_cut), y_trans, loss, optimizer)



Epoch 1
-------------------------------
loss: 5.646904  [   32/  255]
Epoch 2
-------------------------------
loss: 5.230911  [   32/  255]
Epoch 3
-------------------------------
loss: 5.024089  [   32/  255]
Epoch 4
-------------------------------
loss: 4.659718  [   32/  255]
Epoch 5
-------------------------------
loss: 4.626517  [   32/  255]
Epoch 6
-------------------------------
loss: 4.831213  [   32/  255]
Epoch 7
-------------------------------
loss: 5.546222  [   32/  255]
Epoch 8
-------------------------------
loss: 4.679454  [   32/  255]
Epoch 9
-------------------------------
loss: 4.542897  [   32/  255]
Epoch 10
-------------------------------
loss: 4.620372  [   32/  255]
Epoch 11
-------------------------------
loss: 4.130543  [   32/  255]
Epoch 12
-------------------------------
loss: 4.545737  [   32/  255]
Epoch 13
-------------------------------
loss: 4.987671  [   32/  255]
Epoch 14
-------------------------------
loss: 5.100263  [   32/  255]
Epoch 15
------

In [10]:
y_pred = model.evaluate(torch.Tensor(X_cut))
print('accuracy: ', np.sum(np.array(y_pred) == np.array(y_cut)) / len(y_cut))

accuracy:  0.00784313725490196


In [11]:
np.unique(np.array(y_pred) == np.array(y_cut), return_counts=True)

(array([False,  True]), array([253,   2]))

In [12]:

np.unique(y_pred, return_counts=True)

(array([-1,  1]), array([239,  16]))

In [13]:
ndcg = ndcg_score(y_cut.reshape(1, -1), y_pred.reshape(1, -1), k=20)
ndcg

0.5670634643155298

### 1.4. Save Model

In [14]:
# save mdoel
model.save("../models/nn.pth")

# Pairwise SVM Training

In [15]:
svm = RankSVM()
svm.fit(X_cut, y_cut)
dump(svm, "../models/svm.joblib")



['../models/svm.joblib']

In [16]:
preds = svm.predict(X_cut)
preds

array([1.90958881, 1.72799399, 1.86379818, 2.02755245, 2.99523792,
       2.06847515, 1.7993963 , 1.50653503, 2.67641329, 1.82912804,
       1.51366606, 1.38462541, 1.27792644, 1.83515976, 2.01484859,
       1.76154178, 1.48537337, 1.47457715, 0.2519111 , 1.57014001,
       1.39986249, 0.6585438 , 1.5304209 , 1.32594041, 1.727898  ,
       1.026968  , 1.45968672, 1.88032662, 1.74742562, 2.15590273,
       1.97467458, 1.51736082, 1.54048126, 1.56391523, 1.70900246,
       1.57238727, 1.65967897, 2.3362331 , 1.34308825, 2.20015668,
       1.64013029, 2.56013976, 2.53493189, 1.47565326, 2.04163444,
       2.4037059 , 2.96781842, 2.29064057, 1.2449567 , 2.0794536 ,
       1.27414051, 2.44241348, 2.27696806, 2.34336325, 1.5187813 ,
       1.40743793, 2.6317349 , 2.12419614, 2.05946536, 1.40772719,
       2.35468932, 1.83335929, 1.67964576, 1.58403716, 2.01716951,
       2.3968    , 2.07763192, 1.52378332, 2.04193126, 1.90389818,
       2.31289752, 1.38477485, 2.43500294, 1.90084897, 1.87890

In [17]:
ndcg = ndcg_score(y_cut.reshape(1, -1), preds.reshape(1, -1), k=10)
ndcg

0.8755458364887027