<a href="https://colab.research.google.com/github/blancavazquez/Analisis-datos-masivos-2024/blob/main/notebooks/4_learningToRank.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Algoritmos learning to rank

El objetivo de esta libreta es estudiar los algoritmos de learning to rank en sistemas de recomendación

[Créditos](https://github.com/liyinxiao/rankerNN2pmml)


## RankNet

In [None]:
import keras
from keras import backend as K
from keras.layers import Activation, Dense, Input, Subtract
from keras.models import Model
import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [None]:
"""Generación de datos dummies"""
INPUT_DIM = 3

#se definen los objetos a comparar
X1 = 2 * np.random.uniform(size=(50, INPUT_DIM)) #Objeto 1
X2 = np.random.uniform(size=(50, INPUT_DIM)) #Objeto 2
Y = [random.randint(0,1) for _ in range(50)]

print("Tamaño de X1",X1.shape)
print("Tamaño de X2",X2.shape)
print("Tamaño de Y",len(Y))

Tamaño de X1 (50, 3)
Tamaño de X2 (50, 3)
Tamaño de Y 50


In [None]:
# Transformación de los datos
mms = MinMaxScaler() #default=(0, 1)
mms.fit(np.concatenate((X1, X2), axis=0))
X1 = mms.transform(X1)
X2 = mms.transform(X2)
Y = np.asarray(Y)

In [None]:
#Visualizando la entrada
df_input = pd.DataFrame(X1, columns = ['Feature1', 'Feature2', 'Feature3'])
df_input

Unnamed: 0,Feature1,Feature2,Feature3
0,0.549301,0.185645,0.51443
1,1.0,0.577891,0.055167
2,0.840011,0.716998,0.943179
3,0.266198,0.319997,0.354972
4,0.118326,0.870957,0.374374
5,0.303822,0.540104,0.934014
6,0.959117,0.124363,0.561758
7,0.059425,0.805878,0.795463
8,0.051263,0.508419,0.020087
9,0.348503,0.863335,0.314616


In [None]:
def RankNet_model(input_shape):
    """Construcción de la red neuronal"""
    h1 = Dense(4, activation="relu", name='Relu_layer1') #red completamente conectada
    h2 = Dense(2, activation='relu', name='Relu_layer2')
    h3 = Dense(1, activation='linear', name='Identity_layer')
    # Objeto 1 a comparar
    input1 = Input(shape=(input_shape,), name='Input_layer1')
    x1 = h1(input1)
    x1 = h2(x1)
    x1 = h3(x1)
    # Objeto 2 a comparar
    input2 = Input(shape=(input_shape,), name='Input_layer2')
    x2 = h1(input2)
    x2 = h2(x2)
    x2 = h3(x2)
    # Capa de comparación
    subtracted = Subtract(name='Subtract_layer')([x1, x2])
    # Función de activación
    out = Activation('sigmoid', name='Activation_layer')(subtracted)
    # Modelo
    model = Model(inputs=[input1, input2], outputs=out)
    return model

In [None]:
model = RankNet_model(INPUT_DIM)
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input_layer1 (InputLayer)      [(None, 3)]          0           []                               
                                                                                                  
 Input_layer2 (InputLayer)      [(None, 3)]          0           []                               
                                                                                                  
 Relu_layer1 (Dense)            (None, 4)            16          ['Input_layer1[0][0]',           
                                                                  'Input_layer2[0][0]']           
                                                                                                  
 Relu_layer2 (Dense)            (None, 2)            10          ['Relu_layer1[0][0]',        

In [None]:
# Entrenamiento del modelo
model.compile(optimizer="adam", loss="binary_crossentropy")
model.fit([X1,X2], Y, batch_size=10, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0d4d1be5d0>

In [None]:
#Salida del modelo
get_ranker_output = K.function([model.layers[0].input], [model.layers[-3].get_output_at(0)])
Ranker_output = get_ranker_output([X1])[0]

In [None]:
#Visualizando la salida
df_output = pd.DataFrame(Ranker_output, columns = ['score'])
df_output

Unnamed: 0,score
0,0.059439
1,0.080631
2,0.090892
3,0.041242
4,0.045296
5,0.133986
6,0.090748
7,0.135655
8,0.019991
9,0.024268


## LambdaRank

In [None]:
!pip install LambdaRankNN
from LambdaRankNN import LambdaRankNN

#Generación de datos dummies
X = np.array([[0.2, 0.3, 0.4],
              [0.1, 0.7, 0.4],
              [0.3, 0.4, 0.1],
              [0.8, 0.4, 0.3],
              [0.9, 0.35, 0.25]])
y = np.array([0, 1, 0, 0, 2])
qid = np.array([1, 1, 1, 2, 2])

#Entrenamiento del modelo
ranker = LambdaRankNN(input_size=X.shape[1], hidden_layer_sizes=(16,8,),
                      activation=('relu', 'relu',),
                      solver='adam')
ranker.fit(X, y, qid, epochs=5)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
ndcg: 0.8154648767857287


In [None]:
y_pred = ranker.predict(X)
ranker.evaluate(X, y, qid, eval_at=2)

ndcg@2: 0.8154648767857287
