# AutoInt+ 구현 (with TensorFlow)

In [27]:
import re 
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import random
plt.rc('font', family='NanumMyeongjo')

import joblib

import time
import random
import pandas as pd
import numpy as np

from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten, Activation, Input, Embedding, BatchNormalization
from tensorflow.keras.initializers import glorot_normal, Zeros, TruncatedNormal
from tensorflow.keras.regularizers import l2


from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy


from tensorflow.keras.optimizers import Adam
from collections import defaultdict
import math

In [3]:
class FeaturesEmbedding(Layer):
    def __init__(self, field_dims, embed_dim):
        super().__init__()
        self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.int64)
        self.embedding = tf.keras.layers.Embedding(
                sum(field_dims), 
                embed_dim, 
                embeddings_initializer='glorot_uniform'  # TensorFlow's equivalent of xavier_uniform
        )
    
    def call(self, x):
        x = x + tf.constant(self.offsets, dtype=x.dtype)
        return self.embedding(x)

In [4]:
class MultiLayerPerceptron(Layer):
    def __init__(self, inputs_dim, hidden_units, activation='relu', l2_reg=0, 
                 dropout_rate=0, use_bn=False, init_std=0.0001, output_layer=True):
        super().__init__()
        self.dropout_rate = dropout_rate
        self.use_bn = use_bn
        
        hidden_units = [inputs_dim] + list(hidden_units)
        if output_layer:
            hidden_units += [1]
        
        self.layers = []
        for i in range(len(hidden_units) - 1):
            # Linear layer
            layer = Dense(hidden_units[i+1], 
                          kernel_initializer=tf.random_normal_initializer(mean=0, stddev=init_std),
                          kernel_regularizer=tf.keras.regularizers.l2(l2_reg))
            self.layers.append(layer)
            
            # Batch Normalization
            if use_bn:
                self.layers.append(tf.keras.layers.BatchNormalization())
            
            # Activation
            self.layers.append(tf.keras.layers.Activation('relu'))
            
            # Dropout
            self.layers.append(tf.keras.layers.Dropout(dropout_rate))
    
    def call(self, inputs, training=False):
        x = inputs
        for layer in self.layers:
            x = layer(x, training=training)
        return x

In [5]:
class AutoIntMLP(Layer):
    def __init__(self, field_dims, embedding_size, att_layer_num=3, att_head_num=2, 
                 att_res=True, dnn_hidden_units=(32, 32), dnn_activation='relu',
                 l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_use_bn=False, 
                 dnn_dropout=0.4, init_std=0.0001):
        super().__init__()
        
        self.embedding = FeaturesEmbedding(field_dims, embedding_size)
        self.num_fields = len(field_dims)
        self.embedding_size = embedding_size
        self.att_output_dim = self.num_fields * self.embedding_size
        self.embed_output_dim = len(field_dims) * embedding_size
        
        self.dnn_linear = Dense(1, use_bias=False, 
                                kernel_initializer=tf.random_normal_initializer(stddev=init_std))
        
        self.dnn = MultiLayerPerceptron(
            self.embed_output_dim, 
            dnn_hidden_units,
            activation=dnn_activation,
            l2_reg=l2_reg_dnn,
            dropout_rate=dnn_dropout,
            use_bn=dnn_use_bn,
            init_std=init_std
        )
        
        self.int_layers = [
            MultiHeadSelfAttention(
                self.embedding_size, 
                head_num=att_head_num, 
                use_res=att_res
            ) for _ in range(att_layer_num)
        ]
    
    def call(self, X, training=False):
        embed_x = self.embedding(X)
        dnn_embed = embed_x
        att_input = embed_x
        
        for layer in self.int_layers:
            att_input = layer(att_input)
        
        att_output = tf.reshape(att_input, [-1, self.att_output_dim])
        att_output = tf.nn.relu(self.dnn_linear(att_output))
        
        dnn_output = self.dnn(tf.reshape(dnn_embed, [-1, self.embed_output_dim]), training=training)
        
        y_pred = tf.sigmoid(att_output + dnn_output)
        return y_pred

In [6]:
class MultiHeadSelfAttention(Layer):
    def __init__(self, embedding_size, head_num=2, use_res=True, scaling=False):
        super().__init__()
        if head_num <= 0:
            raise ValueError('head_num must be a int > 0')
        if embedding_size % head_num != 0:
            raise ValueError('embedding_size is not an integer multiple of head_num!')
        
        self.att_embedding_size = embedding_size // head_num
        self.head_num = head_num
        self.use_res = use_res
        self.scaling = scaling
        
        self.W_Query = tf.Variable(tf.random.normal((embedding_size, embedding_size), stddev=0.05))
        self.W_Key = tf.Variable(tf.random.normal((embedding_size, embedding_size), stddev=0.05))
        self.W_Value = tf.Variable(tf.random.normal((embedding_size, embedding_size), stddev=0.05))
        
        if self.use_res:
            self.W_Res = tf.Variable(tf.random.normal((embedding_size, embedding_size), stddev=0.05))
    
    def call(self, inputs):
        if len(inputs.shape) != 3:
            raise ValueError(f"Unexpected inputs dimensions {len(inputs.shape)}, expect to be 3 dimensions")
        
        # Linear transformations
        querys = tf.tensordot(inputs, self.W_Query, axes=1)
        keys = tf.tensordot(inputs, self.W_Key, axes=1)
        values = tf.tensordot(inputs, self.W_Value, axes=1)
        
        # Split heads
        querys = tf.stack(tf.split(querys, self.head_num, axis=-1))
        keys = tf.stack(tf.split(keys, self.head_num, axis=-1))
        values = tf.stack(tf.split(values, self.head_num, axis=-1))
        
        # Attention
        inner_product = tf.einsum('bnik,bnjk->bnij', querys, keys)
        if self.scaling:
            inner_product /= tf.sqrt(float(self.att_embedding_size))
        
        normalized_att_scores = tf.nn.softmax(inner_product, axis=-1)
        result = tf.matmul(normalized_att_scores, values)
        
        # Combine heads
        result = tf.concat(tf.unstack(result), axis=-1)
        
        # Residual connection
        if self.use_res:
            result += tf.tensordot(inputs, self.W_Res, axes=1)
        
        return tf.nn.relu(result)

In [7]:
class AutoIntMLPModel(tf.keras.Model):
    def __init__(self, field_dims, embedding_size, att_layer_num=3, att_head_num=2, 
                 att_res=True, l2_reg_dnn=0, l2_reg_embedding=1e-5, 
                 dnn_hidden_units=(32, 32), dnn_activation='relu',
                 dnn_use_bn=False, dnn_dropout=0, init_std=0.0001):
        super().__init__()
        self.autoInt_mlp_layer = AutoIntMLP(
            field_dims, 
            embedding_size, 
            att_layer_num=att_layer_num, 
            att_head_num=att_head_num,
            att_res=att_res, 
            dnn_hidden_units=dnn_hidden_units,
            dnn_activation=dnn_activation,
            l2_reg_dnn=l2_reg_dnn, 
            l2_reg_embedding=l2_reg_embedding,
            dnn_use_bn=dnn_use_bn, 
            dnn_dropout=dnn_dropout, 
            init_std=init_std
        )
    
    def call(self, inputs, training=False):
        return self.autoInt_mlp_layer(inputs, training=training)

In [20]:
# 평가 함수는 아래의 링크에서 가져왔습니다.
# https://www.programcreek.com/python/?code=MaurizioFD%2FRecSys2019_DeepLearning_Evaluation%2FRecSys2019_DeepLearning_Evaluation-master%2FConferences%2FKDD%2FMCRec_our_interface%2FMCRecRecommenderWrapper.py
def get_DCG(ranklist, y_true):
    dcg = 0.0
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item in y_true:
            dcg += 1.0 / math.log(i + 2)
    return  dcg

def get_IDCG(ranklist, y_true):
    idcg = 0.0
    i = 0
    for item in y_true:
        if item in ranklist:
            idcg += 1.0 / math.log(i + 2)
            i += 1
    return idcg

def get_NDCG(ranklist, y_true):
    '''NDCG 평가 지표'''
    ranklist = np.array(ranklist).astype(int)
    y_true = np.array(y_true).astype(int)
    dcg = get_DCG(ranklist, y_true)
    idcg = get_IDCG(y_true, y_true)
    if idcg == 0:
        return 0
    return round( (dcg / idcg), 5)

def get_hit_rate(ranklist, y_true):
    '''hitrate 평가 지표'''
    c = 0
    for y in y_true:
        if y in ranklist:
            c += 1
    return round( c / len(y_true), 5 )

In [22]:
def test_model(model, test_df):
    '''모델 테스트'''
    user_pred_info = defaultdict(list)
    total_rows = len(test_df)
    for i in range(0, total_rows, batch_size):
        features = test_df.iloc[i:i + batch_size, :-1].values
        y_pred = model.predict(features, verbose=False)
        for feature, p in zip(features, y_pred):
            u_i = feature[:2]
            user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
    return user_pred_info

### 데이터 로드 후 정제

In [10]:
data_path='./data/ml-1m/ml-1m'

In [11]:
# 1. 데이터 불러오기
# csv 데이터이므로 read_csv로 가져옵니다.
movielens_rcmm = pd.read_csv(f"{data_path}/movielens_rcmm_v2.csv", dtype=str)
print(movielens_rcmm.shape)
movielens_rcmm.head()

(1000209, 15)


Unnamed: 0,user_id,movie_id,movie_decade,movie_year,rating_year,rating_month,rating_decade,genre1,genre2,genre3,gender,age,occupation,zip,label
0,1,1193,1970s,1975,2001,1,2000s,Drama,no,no,F,1,10,48067,1
1,1,661,1990s,1996,2001,1,2000s,Animation,Children's,Musical,F,1,10,48067,0
2,1,914,1960s,1964,2001,1,2000s,Musical,Romance,no,F,1,10,48067,0
3,1,3408,2000s,2000,2001,1,2000s,Drama,no,no,F,1,10,48067,1
4,1,2355,1990s,1998,2001,1,2000s,Animation,Children's,Comedy,F,1,10,48067,1


In [12]:
label_encoders = {col: LabelEncoder() for col in movielens_rcmm.columns[:-1]} # label은 제외

for col, le in label_encoders.items():
    movielens_rcmm[col] = le.fit_transform(movielens_rcmm[col])

In [13]:
movielens_rcmm['label'] = movielens_rcmm['label'].astype(np.float32)

In [14]:
# 3. 학습 데이터와 테스트데이터로 분리, 0.2 정도로 분리
train_df, test_df = train_test_split(movielens_rcmm, test_size=0.2, random_state=42)
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 800167 entries, 416292 to 121958
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   user_id        800167 non-null  int32  
 1   movie_id       800167 non-null  int32  
 2   movie_decade   800167 non-null  int32  
 3   movie_year     800167 non-null  int32  
 4   rating_year    800167 non-null  int32  
 5   rating_month   800167 non-null  int32  
 6   rating_decade  800167 non-null  int32  
 7   genre1         800167 non-null  int32  
 8   genre2         800167 non-null  int32  
 9   genre3         800167 non-null  int32  
 10  gender         800167 non-null  int32  
 11  age            800167 non-null  int32  
 12  occupation     800167 non-null  int32  
 13  zip            800167 non-null  int32  
 14  label          800167 non-null  float32
dtypes: float32(1), int32(14)
memory usage: 51.9 MB


In [15]:
# 필요 컬럼들과 레이블 정의
# 필드의 각 고유 개수를 정의하는 field_dims를 정의합니다. 이는  임베딩 때 활용됩니다. 
u_i_feature = ['user_id', 'movie_id']
meta_features = ['movie_decade', 'movie_year', 'rating_year', 'rating_month', 'rating_decade', 'genre1','genre2', 'genre3', 'gender', 'age', 'occupation', 'zip']
label = 'label'
field_dims = np.max(movielens_rcmm[u_i_feature + meta_features].astype(np.int64).values, axis=0) + 1
field_dims

array([6040, 3706,   10,   81,    4,   12,    1,   18,   18,   16,    2,
          7,   21, 3439], dtype=int64)

In [16]:
# 에포크, 학습률, 드롭아웃, 배치사이즈, 임베딩 크기 등 정의
epochs=5
learning_rate= 0.0001
dropout= 0.4
batch_size = 2048
embed_dim= 16

In [17]:
autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

In [18]:
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

In [19]:
history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 70ms/step - binary_crossentropy: 0.6915 - loss: 0.6915 - val_binary_crossentropy: 0.6750 - val_loss: 0.6750
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 67ms/step - binary_crossentropy: 0.6709 - loss: 0.6709 - val_binary_crossentropy: 0.6268 - val_loss: 0.6268
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 77ms/step - binary_crossentropy: 0.6418 - loss: 0.6418 - val_binary_crossentropy: 0.6109 - val_loss: 0.6109
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 74ms/step - binary_crossentropy: 0.6355 - loss: 0.6355 - val_binary_crossentropy: 0.6067 - val_loss: 0.6067
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 78ms/step - binary_crossentropy: 0.6325 - loss: 0.6325 - val_binary_crossentropy: 0.6037 - val_loss: 0.6037


In [24]:
# 사용자에게 예측된 정보를 저장하는 딕셔너리 
user_pred_info = {}
# top10개
top = 10
# 테스트 값을 가지고 옵니다. 
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df)
# 사용자마다 돌면서 예측 데이터 중 가장 높은 top 10만 가져옵니다. 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
# 원본 테스트 데이터에서 label이 1인 사용자 별 영화 정보를 가져옵니다.
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 77246.32it/s]


In [25]:
mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

# 모델 예측값과 원본 테스트 데이터를 비교해서 어느정도 성능이 나왔는지 NDCG와 Hitrate를 비교합니다.
# NDCG
for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    # NDCG 값 구하기
    user_ndcg = get_NDCG(mymodel_pred, testset)

    mymodel_ndcg_result[user] = user_ndcg

# Hitrate
for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    # hitrate 값 구하기
    user_hitrate = get_hit_rate(mymodel_pred, testset)

    # 사용자 hitrate 결과 저장
    mymodel_hitrate_result[user] = user_hitrate

100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 6760.22it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 53724.87it/s]


In [26]:
print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

 mymodel ndcg :  0.65873
 mymodel hitrate :  0.6268


In [28]:
np.save('./data/field_dims_plus1.npy', field_dims)
autoIntMLP_model.save_weights('./model/autoIntMLP_model_1.weights.h5')
joblib.dump(label_encoders, './data/label_encoders_1.pkl')

['./data/label_encoders_1.pkl']

In [30]:
# 에포크, 학습률, 드롭아웃, 배치사이즈, 임베딩 크기 등 정의
epochs=10
learning_rate= 0.0001
dropout= 0.6
batch_size = 2048
embed_dim= 16

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 84ms/step - binary_crossentropy: 0.6920 - loss: 0.6920 - val_binary_crossentropy: 0.6831 - val_loss: 0.6831
Epoch 2/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 87ms/step - binary_crossentropy: 0.6836 - loss: 0.6836 - val_binary_crossentropy: 0.6602 - val_loss: 0.6602
Epoch 3/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 79ms/step - binary_crossentropy: 0.6680 - loss: 0.6680 - val_binary_crossentropy: 0.6378 - val_loss: 0.6378
Epoch 4/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 76ms/step - binary_crossentropy: 0.6598 - loss: 0.6598 - val_binary_crossentropy: 0.6312 - val_loss: 0.6312
Epoch 5/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 77ms/step - binary_crossentropy: 0.6568 - loss: 0.6568 - val_binary_crossentropy: 0.6280 - val_loss: 0.6280
Epoch 6/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [31]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 66615.31it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 6493.64it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 51631.69it/s]

 mymodel ndcg :  0.65776
 mymodel hitrate :  0.62515





에포크를 늘리고 드롭아웃 사이즈를 조정했더니, 약간 성능이 안좋아졌다.

adam 말고 다른 옵티마이저들을 먼저 테스트하고, 옵티마이저를 고정시킨 후에 다른 하이퍼파라미터들을 조정해보자.

옵티마이저는 AdamW, RMSprop, NAdam을 사용해보겠다.

1. AdamW

In [33]:
# 에포크, 학습률, 드롭아웃, 배치사이즈, 임베딩 크기 등 정의
epochs=5
learning_rate= 0.0001
dropout= 0.4
batch_size = 2048
embed_dim= 16

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers
optimizer = tf.keras.optimizers.AdamW(learning_rate=learning_rate, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 113ms/step - binary_crossentropy: 0.6917 - loss: 0.6917 - val_binary_crossentropy: 0.6772 - val_loss: 0.6772
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 105ms/step - binary_crossentropy: 0.6733 - loss: 0.6733 - val_binary_crossentropy: 0.6319 - val_loss: 0.6319
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 108ms/step - binary_crossentropy: 0.6437 - loss: 0.6437 - val_binary_crossentropy: 0.6133 - val_loss: 0.6133
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 111ms/step - binary_crossentropy: 0.6361 - loss: 0.6361 - val_binary_crossentropy: 0.6088 - val_loss: 0.6088
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 110ms/step - binary_crossentropy: 0.6337 - loss: 0.6337 - val_binary_crossentropy: 0.6054 - val_loss: 0.6054


In [34]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 76052.82it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 7215.83it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 44701.34it/s]

 mymodel ndcg :  0.65909
 mymodel hitrate :  0.62665





2. RMSProp

In [35]:
autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers
optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, momentum=0.9)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 121ms/step - binary_crossentropy: 0.6893 - loss: 0.6893 - val_binary_crossentropy: 0.6830 - val_loss: 0.6830
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 99ms/step - binary_crossentropy: 0.6867 - loss: 0.6867 - val_binary_crossentropy: 0.6828 - val_loss: 0.6828
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 98ms/step - binary_crossentropy: 0.6864 - loss: 0.6864 - val_binary_crossentropy: 0.6831 - val_loss: 0.6831
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 98ms/step - binary_crossentropy: 0.6864 - loss: 0.6864 - val_binary_crossentropy: 0.6835 - val_loss: 0.6835
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 97ms/step - binary_crossentropy: 0.6866 - loss: 0.6866 - val_binary_crossentropy: 0.6832 - val_loss: 0.6832


In [36]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 69554.30it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:01<00:00, 5992.20it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 36459.78it/s]

 mymodel ndcg :  0.58267
 mymodel hitrate :  0.58383





3. NAdam

In [37]:
autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers
optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 105ms/step - binary_crossentropy: 0.6908 - loss: 0.6908 - val_binary_crossentropy: 0.6704 - val_loss: 0.6704
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 101ms/step - binary_crossentropy: 0.6677 - loss: 0.6677 - val_binary_crossentropy: 0.6262 - val_loss: 0.6262
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 96ms/step - binary_crossentropy: 0.6411 - loss: 0.6411 - val_binary_crossentropy: 0.6117 - val_loss: 0.6117
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 95ms/step - binary_crossentropy: 0.6354 - loss: 0.6354 - val_binary_crossentropy: 0.6067 - val_loss: 0.6067
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 94ms/step - binary_crossentropy: 0.6333 - loss: 0.6333 - val_binary_crossentropy: 0.6048 - val_loss: 0.6048


In [38]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 64771.84it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 6885.58it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 45211.32it/s]


 mymodel ndcg :  0.65875
 mymodel hitrate :  0.62645


4. NAdam with learning rate scheduling

In [39]:
autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.Nadam(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 126ms/step - binary_crossentropy: 0.6914 - loss: 0.6914 - val_binary_crossentropy: 0.6768 - val_loss: 0.6768
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 123ms/step - binary_crossentropy: 0.6754 - loss: 0.6754 - val_binary_crossentropy: 0.6449 - val_loss: 0.6449
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 122ms/step - binary_crossentropy: 0.6519 - loss: 0.6519 - val_binary_crossentropy: 0.6222 - val_loss: 0.6222
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 123ms/step - binary_crossentropy: 0.6405 - loss: 0.6405 - val_binary_crossentropy: 0.6134 - val_loss: 0.6134
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 124ms/step - binary_crossentropy: 0.6370 - loss: 0.6370 - val_binary_crossentropy: 0.6095 - val_loss: 0.6095


In [40]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 67611.96it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 6762.78it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 46909.03it/s]

 mymodel ndcg :  0.65909
 mymodel hitrate :  0.62726





5. AdamW with learning rate scheduling 

In [44]:
embed_dim= 16

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 102ms/step - binary_crossentropy: 0.6915 - loss: 0.6915 - val_binary_crossentropy: 0.6768 - val_loss: 0.6768
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 89ms/step - binary_crossentropy: 0.6741 - loss: 0.6741 - val_binary_crossentropy: 0.6392 - val_loss: 0.6392
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 88ms/step - binary_crossentropy: 0.6487 - loss: 0.6487 - val_binary_crossentropy: 0.6179 - val_loss: 0.6179
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 88ms/step - binary_crossentropy: 0.6384 - loss: 0.6384 - val_binary_crossentropy: 0.6113 - val_loss: 0.6113
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 87ms/step - binary_crossentropy: 0.6350 - loss: 0.6350 - val_binary_crossentropy: 0.6080 - val_loss: 0.6080


In [45]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 69236.36it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 6440.84it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 43471.28it/s]

 mymodel ndcg :  0.65916
 mymodel hitrate :  0.6271





테스트 결과 NAdam이나 AdamW를 사용하면서 learning rate을 시간에 따라 줄이는 learning_rate scheduling을 사용하는 것이 성능이 가장 좋았다. 이를 바탕으로 하이퍼 파라미터를 튜닝해보자.

optimizer는 adamw로 고정하겠다.

In [41]:
epochs=5
dropout= 0.4
batch_size = 2048
embed_dim= 32

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 149ms/step - binary_crossentropy: 0.6902 - loss: 0.6902 - val_binary_crossentropy: 0.6678 - val_loss: 0.6678
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 137ms/step - binary_crossentropy: 0.6640 - loss: 0.6640 - val_binary_crossentropy: 0.6256 - val_loss: 0.6256
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 138ms/step - binary_crossentropy: 0.6404 - loss: 0.6404 - val_binary_crossentropy: 0.6140 - val_loss: 0.6140
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 139ms/step - binary_crossentropy: 0.6350 - loss: 0.6350 - val_binary_crossentropy: 0.6093 - val_loss: 0.6093
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 141ms/step - binary_crossentropy: 0.6326 - loss: 0.6326 - val_binary_crossentropy: 0.6066 - val_loss: 0.6066


In [42]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 53247.73it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:01<00:00, 5257.01it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 45674.99it/s]

 mymodel ndcg :  0.65848
 mymodel hitrate :  0.62627





embed dimension을 늘렸더니 미세하게 성능이 안좋아진 모습이다.

In [47]:
epochs=5
dropout= 0.2
batch_size = 2048
embed_dim= 16

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=50,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 108ms/step - binary_crossentropy: 0.6916 - loss: 0.6916 - val_binary_crossentropy: 0.6772 - val_loss: 0.6772
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 104ms/step - binary_crossentropy: 0.6738 - loss: 0.6738 - val_binary_crossentropy: 0.6450 - val_loss: 0.6450
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 103ms/step - binary_crossentropy: 0.6464 - loss: 0.6464 - val_binary_crossentropy: 0.6200 - val_loss: 0.6200
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 102ms/step - binary_crossentropy: 0.6284 - loss: 0.6284 - val_binary_crossentropy: 0.6091 - val_loss: 0.6091
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 104ms/step - binary_crossentropy: 0.6213 - loss: 0.6213 - val_binary_crossentropy: 0.6042 - val_loss: 0.6042


In [48]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 54974.37it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:01<00:00, 4890.06it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 25570.65it/s]

 mymodel ndcg :  0.65881
 mymodel hitrate :  0.6275





In [49]:
epochs=10
dropout= 0.3
batch_size = 2048
embed_dim= 32

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 150ms/step - binary_crossentropy: 0.6906 - loss: 0.6906 - val_binary_crossentropy: 0.6674 - val_loss: 0.6674
Epoch 2/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 144ms/step - binary_crossentropy: 0.6618 - loss: 0.6618 - val_binary_crossentropy: 0.6189 - val_loss: 0.6189
Epoch 3/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 145ms/step - binary_crossentropy: 0.6318 - loss: 0.6318 - val_binary_crossentropy: 0.6055 - val_loss: 0.6055
Epoch 4/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 138ms/step - binary_crossentropy: 0.6244 - loss: 0.6244 - val_binary_crossentropy: 0.6016 - val_loss: 0.6016
Epoch 5/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 137ms/step - binary_crossentropy: 0.6220 - loss: 0.6220 - val_binary_crossentropy: 0.5998 - val_loss: 0.5998
Epoch 6/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [50]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 77556.69it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 7601.81it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 45674.16it/s]

 mymodel ndcg :  0.65881
 mymodel hitrate :  0.62667





In [51]:
epochs= 7
dropout= 0.3
batch_size = 2048
embed_dim= 16

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0.001, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 102ms/step - binary_crossentropy: 0.6917 - loss: 0.6919 - val_binary_crossentropy: 0.6793 - val_loss: 0.6808
Epoch 2/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 106ms/step - binary_crossentropy: 0.6787 - loss: 0.6809 - val_binary_crossentropy: 0.6550 - val_loss: 0.6602
Epoch 3/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 105ms/step - binary_crossentropy: 0.6554 - loss: 0.6615 - val_binary_crossentropy: 0.6265 - val_loss: 0.6351
Epoch 4/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 106ms/step - binary_crossentropy: 0.6367 - loss: 0.6457 - val_binary_crossentropy: 0.6154 - val_loss: 0.6250
Epoch 5/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 106ms/step - binary_crossentropy: 0.6325 - loss: 0.6420 - val_binary_crossentropy: 0.6114 - val_loss: 0.6210
Epoch 6/7
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3

In [52]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 65430.18it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:01<00:00, 5605.22it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 35690.07it/s]


 mymodel ndcg :  0.65948
 mymodel hitrate :  0.62753


In [53]:
epochs= 10
dropout= 0.3
batch_size = 2048
embed_dim= 32

autoIntMLP_model = AutoIntMLPModel(
    field_dims=field_dims, 
    embedding_size=embed_dim, 
    att_layer_num=3, 
    att_head_num=2, 
    att_res=True, 
    dnn_hidden_units=(32, 32), 
    dnn_activation='relu',
    l2_reg_dnn=0.001, 
    l2_reg_embedding=1e-5, 
    dnn_use_bn=False, 
    dnn_dropout=dropout, 
    init_std=0.0001
)

import tensorflow as tf
import tensorflow.keras.optimizers

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    learning_rate,
    decay_steps=100,
    decay_rate=0.96
)
optimizer = tf.keras.optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
loss_fn = BinaryCrossentropy(from_logits=False)

autoIntMLP_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_crossentropy'])

history = autoIntMLP_model.fit(train_df[u_i_feature + meta_features], train_df[label], epochs=epochs, batch_size=batch_size, validation_split=0.1)

Epoch 1/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 159ms/step - binary_crossentropy: 0.6914 - loss: 0.6916 - val_binary_crossentropy: 0.6776 - val_loss: 0.6793
Epoch 2/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 152ms/step - binary_crossentropy: 0.6740 - loss: 0.6772 - val_binary_crossentropy: 0.6409 - val_loss: 0.6485
Epoch 3/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 151ms/step - binary_crossentropy: 0.6429 - loss: 0.6516 - val_binary_crossentropy: 0.6171 - val_loss: 0.6268
Epoch 4/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 148ms/step - binary_crossentropy: 0.6320 - loss: 0.6417 - val_binary_crossentropy: 0.6117 - val_loss: 0.6210
Epoch 5/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 146ms/step - binary_crossentropy: 0.6277 - loss: 0.6370 - val_binary_crossentropy: 0.6088 - val_loss: 0.6178
Epoch 6/10
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [54]:
user_pred_info = {}
top = 10
mymodel_user_pred_info = test_model(autoIntMLP_model, test_df) 
for user, data_info in tqdm(mymodel_user_pred_info.items(), total=len(mymodel_user_pred_info), position=0, leave=True):
    ranklist = sorted(data_info, key=lambda s : s[1], reverse=True)[:top]
    ranklist = list(dict.fromkeys([r[0] for r in ranklist]))
    user_pred_info[str(user)] = ranklist
test_data = test_df[test_df['label']==1].groupby('user_id')['movie_id'].apply(list)

mymodel_ndcg_result = {}
mymodel_hitrate_result = {}

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))
    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_ndcg = get_NDCG(mymodel_pred, testset)
    mymodel_ndcg_result[user] = user_ndcg

for user, data_info in tqdm(test_data.items(), total=len(test_data), position=0, leave=True):
    mymodel_pred = user_pred_info.get(str(user))

    testset = list(set(np.array(data_info).astype(int)))
    mymodel_pred = mymodel_pred[:top]

    user_hitrate = get_hit_rate(mymodel_pred, testset)

    mymodel_hitrate_result[user] = user_hitrate

print(" mymodel ndcg : ", round(np.mean(list(mymodel_ndcg_result.values())), 5))
print(" mymodel hitrate : ", round(np.mean(list(mymodel_hitrate_result.values())), 5))

  user_pred_info[int(u_i[0])].append((int(u_i[1]), float(p)))
100%|███████████████████████████████████████████████████████████████████████████| 6038/6038 [00:00<00:00, 58319.69it/s]
100%|████████████████████████████████████████████████████████████████████████████| 5994/5994 [00:01<00:00, 5776.66it/s]
100%|███████████████████████████████████████████████████████████████████████████| 5994/5994 [00:00<00:00, 50011.75it/s]

 mymodel ndcg :  0.65974
 mymodel hitrate :  0.62713





현재까지의 결과를 정리해보면 다음과 같다. ndcg와 hitrate를 기준으로 성능을 평가하고 있다.

1. 노드 상의 코드를 이용한 AutoInt+ 모델
 ndcg :  0.65909, hitrate :  0.626
2. AdamW 적용
 ndcg : 0.65916, hitrate : 0.6271
3. epoch 7, dropout 0.3, l2_reg_dnn 0.001
 ndcg : 0.65948, hitrate : 0.62753
4. epoch 10, dropout 0.3, l2_reg_dnn 0.001, embed_dim 32
 ndcg : 0.65974, hitrate : 0.62713

3과 4는 성능이 비슷하지만, ndcg가 가장 높게 나온 4번을 기준으로 streamlit 시각화를 위한 모델을 저장한다.

In [55]:
np.save('./data/field_dims_tuned.npy', field_dims)
autoIntMLP_model.save_weights('./model/autoIntMLP_model_tuned.weights.h5')
joblib.dump(label_encoders, './data/label_encoders_tuned.pkl')

['./data/label_encoders_tuned.pkl']

## 회고

사실 아직도 완전히 이 개념을 이해하지는 못한 것 같다.

하지만 분명 논문과 코드 리뷰를 통해서 공부한 것보다는 훨씬 잘 이해된 것 같다.

오토인트 모델이 attention 메커니즘을 사용한다는 점, 어텐션 메커니즘을 q, k, v값에 적용하고 여러 개를 함께 묶어 값을 도출한다는 것을 좀 더 직관적으로 알 수 있었다.

또한, 오토인트+ 모델은 거기에 dnn을 병렬적으로 배치함으로써 성능의 상승을 꾀했다.

사실 실제로 이런 저런 테스트를 진행해보면서 느낀 것은, (그리고 영상에서도 해당 부분을 언급했던 것으로 기억하는데) 플러스 모델과 일반 모델이 그리 큰 성능 차이를 보여주지 못한다는 점이다.

또한, 추천 시스템이 원래 이런 것인지, 아니면 내가 건드려볼 수 있는 더 다양한 하이퍼 파라미터들을 내버려둬서 그런 것인지는 모르겠지만, 파라미터를 조절한다고 크게 성능의 향상이나 저하가 있지는 않았다. 안정적인 모델이라는 반증일수도 있겠다.

그리고 파이토치와 텐서플로를 비교하고 텐서플로를 통해 파이토치 코드를 변환시켜 적용해봄으로써 딥러닝 모델의 패키지별 구성이 어떤 식으로 일어나는지 알 수 있었다.

어떤 부분에서는 파이토치가 좀 더 편한 부분이 있는 것도 같아, 파이토치도 한 번 배워놓으면 좋겠다는 생각도 든다.

우선 여러 테스트를 통해 기본 +모델보다 좀 더 높은 성능을 보이는 + 튜닝 모델을 만들어 냈다는 것에 주안점을 두고,

streamlit으로도 무사히 해당 기능을 구현해냈다는 데 만족스럽다.

또한 streamlit을 일종의 rag 플랫폼이라고 생각했었는데, 이런 식으로 MLOps를 위한 패키지로도 사용할 수 있다는 점이 신선했다.

마지막으로 굉장히 재밌다고 느꼈던 점이 있다.

streamlit 결과를 봤을 때 처음에는 성능이 좋지 않다고 생각했다. 추천 결과와 이력 상의 값들이 생각보다 너무 맞지 않다고 느꼈기 때문이다.

하지만 영화 하나하나를 잘 따져보면, 생각보다 영화의 느낌들이 비슷한 것을 볼 수 있다.

AutoInt는 사실 이전의 추천시스템들이 너무 one-order나 two-order, high-order에 선형적으로 영향을 받는 것을 문제삼아 등장한 모델이기 때문에, 더 feature간 interaction을 중요하게 모델링한다고 했다.

따라서, 하나의 데이터의 '장르'라는 feature에만 영향을 받지 않고, 그것보다 훨씬 더 다양한 측면에서 영향을 받는 것이다.

그렇기 때문에 그냥 눈으로 봐도 생각보다 비슷한 영화들이 추천된 것을 확인할 수 있었다. 장르와 같은 직관적인 부분 말고도 다른 특성들을 통해 이런 추천을 수행할 수 있다는 게 확실히 신기하긴 했다.

추천 시스템이 재미있는 점 중 하나는, 직관적으로 보이는 ndcg나 hitrate와 같은 성능 수치가 실제 결괏값에 미치는 영향도와 다를 수 있다는 점이다.

사실 수많은 영화들이 존재하고, 수많은 상품들이 존재하는데 어떤 사람이 어떤 상품을 좋아한다고 이미 말했다고 해서 추천된 상품을 좋아하지 않을 거라는 보장이 없지 않은가?

이런 정성적인 부분을 어떻게 고려하느냐도 추천 시스템을 개발하는 데 어느 정도 도움이 되지 않을까 생각하면서, 이 프로젝트를 마무리하겠다.