# 1. Neural Network Training

In [1]:
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")

import torch
import torch.nn as nn
import numpy as np
from joblib import dump
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import ndcg_score

from RankingAlgorithms.neuralnetwork import NeuralNetwork
from RankingAlgorithms.pwsvm import RankSVM
from DataHandling.train_data import load_data


### 1.1. Load training and test data

In [2]:
feature_indices = [108, 18, 103, 3, 8, 13, 125, 126, 107, 17, 102, 2, 7, 12, 105, 15, 100, 0, 5, 10]

In [3]:
# load training data
X_train, y_train = load_data(path="../../../data/MSLR-WEB10K/Fold1/train.txt", nrows=10000, feature_indices=feature_indices)
#X_test, y_test = load_data(path="../../../data/MSLR-WEB10K/Fold1/test.txt", nrows=10000)


In [4]:
print('label counts', np.unique(y_train, return_counts=True))
n_samples_per_class = np.unique(y_train, return_counts=True)[1][-1]

label counts (array([0., 1., 2., 3., 4.]), array([5481, 3000, 1326,  142,   51]))


In [5]:
# balance dataset
indices = []
for label in range(5):
    indices.append(list(np.random.choice(np.where(y_train == label)[0], n_samples_per_class, replace=False)))
    
indices = np.array(indices).flatten()

X_cut = X_train[indices, :]
y_cut = y_train[indices]

print(X_cut.shape, y_cut.shape)
print('label counts: ', np.unique(y_cut, return_counts=True))

(255, 20) (255,)
label counts:  (array([0., 1., 2., 3., 4.]), array([51, 51, 51, 51, 51]))


In [6]:
y_trans = torch.zeros(len(y_cut), 5)
for i, label in enumerate(y_cut):
    y_trans[i, 0:int(label)+1] = 1 


### 1.2. Create model

In [7]:
model = NeuralNetwork(n_features=len(feature_indices), n_hidden=10, load=False)

In [8]:
### define optimizer and loss fct
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss = nn.CrossEntropyLoss()

### 1.3. Train Model

In [9]:
for epoch in range(100):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss = model.train_loop(torch.Tensor(X_cut), y_trans, loss, optimizer)



Epoch 1
-------------------------------
loss: 5.243661  [   32/  255]
Epoch 2
-------------------------------
loss: 4.241916  [   32/  255]
Epoch 3
-------------------------------
loss: 4.698521  [   32/  255]
Epoch 4
-------------------------------
loss: 4.940804  [   32/  255]
Epoch 5
-------------------------------
loss: 5.576783  [   32/  255]
Epoch 6
-------------------------------
loss: 4.185291  [   32/  255]
Epoch 7
-------------------------------
loss: 4.594983  [   32/  255]
Epoch 8
-------------------------------
loss: 5.218893  [   32/  255]
Epoch 9
-------------------------------
loss: 4.765697  [   32/  255]
Epoch 10
-------------------------------
loss: 4.549974  [   32/  255]
Epoch 11
-------------------------------
loss: 4.565063  [   32/  255]
Epoch 12
-------------------------------
loss: 4.735998  [   32/  255]
Epoch 13
-------------------------------
loss: 5.642962  [   32/  255]
Epoch 14
-------------------------------
loss: 4.451110  [   32/  255]
Epoch 15
------

In [10]:
y_pred = model.evaluate(torch.Tensor(X_cut))
print('accuracy: ', np.sum(np.array(y_pred) == np.array(y_cut)) / len(y_cut))

accuracy:  0.2


In [11]:
np.unique(np.array(y_pred) == np.array(y_cut), return_counts=True)

(array([False,  True]), array([204,  51]))

In [12]:

np.unique(y_pred, return_counts=True)

(array([2, 3]), array([249,   6]))

In [13]:
ndcg = ndcg_score(y_cut.reshape(1, -1), y_pred.reshape(1, -1), k=20)
ndcg

0.5951269310031864

### 1.4. Save Model

In [14]:
# save mdoel
model.save("../models/nn.pth")

# Pairwise SVM Training

In [19]:
svm = RankSVM()
svm.fit(X_cut, y_cut)

FileNotFoundError: [Errno 2] No such file or directory: 'src/python/models/ranksvm_coef.pkl'

In [None]:
preds = svm.predict(X_cut)
preds

array([ 2.15770825,  0.50992615,  1.78252907,  5.5612664 , 14.27851976,
        5.21922592,  2.59735256,  0.63082727,  2.38209859,  3.6481356 ,
        1.24197684,  2.20891319,  1.04438219,  3.11336391,  6.63151552,
        1.93513109,  3.39426069,  1.97573538, -0.02410986,  1.56432121,
        0.67095663,  0.26377838,  1.68359401,  0.05318055,  1.34890445,
        1.02213252,  1.67102159,  4.96835832,  4.10873211, 17.05088478,
        3.12368885,  0.81831372,  5.05378742,  2.14668215,  4.9659123 ,
        1.76079487,  4.06223585,  0.96345589,  3.47819071, 18.63590791,
        1.05129043,  2.0381725 ,  5.86312733,  1.31376383,  3.92189547,
        6.84027136,  4.41560397,  2.41779838,  0.3908853 ,  1.31975431,
        1.89458781,  0.89293467,  1.37638772,  3.19030837,  1.843453  ,
        0.939346  ,  1.79368408,  2.46226496,  3.07197132,  1.3375709 ,
        1.22004651,  0.79768477,  1.50403245,  0.84332623, 10.92482543,
        7.29285028,  2.28888023, -0.09932107,  3.06827776, -0.44

In [None]:
ndcg = ndcg_score(y_cut.reshape(1, -1), preds.reshape(1, -1), k=10)
ndcg

0.23533060690920224