# 1. Neural Network Training

In [1]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

import torch
import torch.nn as nn
import numpy as np
from joblib import dump
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import ndcg_score

from RankingAlgorithms.neuralnetwork import NeuralNetwork
from RankingAlgorithms.pwsvm import RankSVM
from DataHandling.train_data import load_data


### 1.1. Load training and test data

In [2]:
feature_indices = [108, 18, 103, 3, 8, 13, 125, 126, 107, 17, 102, 2, 7, 12, 105, 15, 100, 0, 5, 10]

In [3]:
# load training data
X_train, y_train = load_data(path="../../../data/MSLR-WEB10K/Fold1/train.txt", nrows=10000, feature_indices=feature_indices)
#X_test, y_test = load_data(path="../../../data/MSLR-WEB10K/Fold1/test.txt", nrows=10000)


In [4]:
print('label counts', np.unique(y_train, return_counts=True))
n_samples_per_class = np.unique(y_train, return_counts=True)[1][-1]

label counts (array([0., 1., 2., 3., 4.]), array([5481, 3000, 1326,  142,   51]))


In [5]:
# balance dataset
indices = []
for label in range(5):
    indices.append(list(np.random.choice(np.where(y_train == label)[0], n_samples_per_class, replace=False)))
    
indices = np.array(indices).flatten()

X_cut = X_train[indices, :]
y_cut = y_train[indices]

print(X_cut.shape, y_cut.shape)
print('label counts: ', np.unique(y_cut, return_counts=True))

(255, 20) (255,)
label counts:  (array([0., 1., 2., 3., 4.]), array([51, 51, 51, 51, 51]))


In [6]:
y_trans = torch.zeros(len(y_cut), 5)
for i, label in enumerate(y_cut):
    y_trans[i, 0:int(label)+1] = 1 


### 1.2. Create model

In [7]:
model = NeuralNetwork(n_features=len(feature_indices), n_hidden=10, load=False)

In [8]:
### define optimizer and loss fct
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss = nn.CrossEntropyLoss()

### 1.3. Train Model

In [9]:
for epoch in range(100):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss = model.train_loop(torch.Tensor(X_cut), y_trans, loss, optimizer)



Epoch 1
-------------------------------
loss: 5.235372  [   32/  255]
Epoch 2
-------------------------------
loss: 4.332633  [   32/  255]
Epoch 3
-------------------------------
loss: 5.008522  [   32/  255]
Epoch 4
-------------------------------
loss: 4.502904  [   32/  255]
Epoch 5
-------------------------------
loss: 4.550197  [   32/  255]
Epoch 6
-------------------------------
loss: 4.224727  [   32/  255]
Epoch 7
-------------------------------
loss: 4.303533  [   32/  255]
Epoch 8
-------------------------------
loss: 5.106229  [   32/  255]
Epoch 9
-------------------------------
loss: 4.909264  [   32/  255]
Epoch 10
-------------------------------
loss: 4.294786  [   32/  255]
Epoch 11
-------------------------------
loss: 4.931311  [   32/  255]
Epoch 12
-------------------------------
loss: 4.567747  [   32/  255]
Epoch 13
-------------------------------
loss: 4.777458  [   32/  255]
Epoch 14
-------------------------------
loss: 4.546021  [   32/  255]
Epoch 15
------

In [10]:
y_pred = model.evaluate(torch.Tensor(X_cut))
print('accuracy: ', np.sum(np.array(y_pred) == np.array(y_cut)) / len(y_cut))

accuracy:  0.2


In [11]:
np.unique(np.array(y_pred) == np.array(y_cut), return_counts=True)

(array([False,  True]), array([204,  51]))

In [12]:

np.unique(y_pred, return_counts=True)

(array([1]), array([255]))

In [13]:
ndcg = ndcg_score(y_cut.reshape(1, -1), y_pred.reshape(1, -1), k=20)
ndcg

0.49999999999999994

### 1.4. Save Model

In [14]:
# save mdoel
model.save("../models/nn.pth")

# Pairwise SVM Training

In [15]:
svm = RankSVM(load=False)
svm.fit(X_cut, y_cut)



In [20]:
preds = svm.predict(X_cut)
svm.save_coef("../models/ranksvm_coef.pkl")
preds

array([-6.76793399e+00, -4.86979760e+00, -1.16243722e+01, -7.87352797e+00,
       -5.46389431e+00, -1.28447799e+01, -8.65985065e-01,  1.56246046e-01,
       -1.88554148e+00, -6.41238396e+00, -9.48179586e-01, -1.83691934e+00,
       -9.41967821e-01, -6.17026194e-01, -6.73894076e+00, -9.21920136e-01,
       -6.55795737e+00, -1.30042793e+00, -3.77519488e+00, -3.24899065e+00,
       -1.81251054e+00, -6.19496705e-01,  1.83292444e-01, -6.05044307e+00,
       -6.67430998e+00, -2.04984044e+00, -7.02932425e-01, -4.66791753e+00,
       -2.05567515e-01, -7.17213089e+00, -4.50693459e-01, -2.04947909e+00,
       -1.27292429e+00, -4.33680659e+00, -2.19481581e+00, -1.18554889e+00,
       -7.18551069e+00, -5.06002081e+00, -4.50050433e+00, -6.93675671e-01,
       -4.19411620e+00, -5.56314055e-01, -1.93361801e+01, -5.35237221e+00,
       -1.25420656e+00, -3.71445085e+00, -5.46829891e+00, -2.16658268e+00,
       -3.25423227e+00,  6.33638358e-01, -4.12400614e+00, -1.37828927e+00,
       -2.44093662e+00, -

In [17]:
ndcg = ndcg_score(y_cut.reshape(1, -1), preds.reshape(1, -1), k=10)
ndcg

0.887853830146255