In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import os
import numexpr as ne
import json
pd.set_option('max_columns', 300)
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.optimizers import Adam, RMSprop
from tensorflow.python.keras.layers import Input, Dense, Embedding, Flatten, Dropout, merge, Activation, BatchNormalization, LeakyReLU
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras import backend as K
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.python.keras import initializers
from tensorflow.python.keras.layers import add, concatenate
%matplotlib inline  
import warnings
warnings.filterwarnings('ignore')
from keras.callbacks import EarlyStopping, ModelCheckpoint
from scipy.sparse import csr_matrix
import tensorflow as tf
from tensorflow.python.keras.models import model_from_json
from sklearn import preprocessing
from keras.utils import plot_model

Using TensorFlow backend.


## Data preprocessing

In [None]:
data = pd.read_json("/content/drive/My Drive/TOBIGS/Recommender System/TA_User_Reviws_Korea_all.json", typ='frame')

In [None]:
df = data[['userId', 'location.name', 'rating', 'createdDate']]
df.columns = ['user_emb_id', 'location_emb_id', 'rating', 'timestamp']

In [6]:
df.head()

Unnamed: 0,user_emb_id,location_emb_id,rating,timestamp
0,F9AD0C1F6409A1574AD0AFC237D2A4CE,Hotel Gracery Seoul,5,2020-03-01
1,23EF5413495FF530151DB7E5C0FEF38D,Hongik University Street,5,2020-02-14
2,23EF5413495FF530151DB7E5C0FEF38D,Noonaholdak Hongdae,4,2020-02-14
3,23EF5413495FF530151DB7E5C0FEF38D,L7 Hongdae,5,2020-02-14
4,23EF5413495FF530151DB7E5C0FEF38D,Myeongdong Shopping Street,4,2020-02-14


In [None]:
user_dict = {}
for idx, unique_user in enumerate(df.user_emb_id.unique()):
    user_dict[unique_user] = idx
    
location_dict = {}
for idx, unique_location in enumerate(df.location_emb_id.unique()):
    location_dict[unique_location] = idx

In [None]:
df['user_emb_id'] = df['user_emb_id'].replace(user_dict)
df['location_emb_id'] = df['location_emb_id'].replace(location_dict)

In [9]:
df.head()

Unnamed: 0,user_emb_id,location_emb_id,rating,timestamp
0,0,0,5,2020-03-01
1,1,1,5,2020-02-14
2,1,2,4,2020-02-14
3,1,3,5,2020-02-14
4,1,4,4,2020-02-14


In [None]:
#+1 is the real size, as they are zero based
num_users = df['user_emb_id'].unique().max() + 1
num_locations = df['location_emb_id'].unique().max() + 1

In [None]:
train_df, test_df = train_test_split(df,
                                     test_size=0.1,
                                     random_state=999613182)

In [None]:
train_df, validate_df = train_test_split(train_df,
                                 test_size=0.1,
                                 random_state=999613182)

In [None]:
def dataPreprocessor(rating_df, num_users, num_items, init_value=0, average=False):
    """
        INPUT: 
            data: pandas DataFrame. columns=['userID', 'itemID', 'rating' ...]
            num_row: int. number of users
            num_col: int. number of items
            
        OUTPUT:
            matrix: 2D numpy array. 
    """
    if average:
        matrix = np.full((num_users, num_items), 0.0)
        for (_, userID, itemID, rating, timestamp) in rating_df.itertuples():
            matrix[userID, itemID] = rating
            avergae = np.true_divide(matrix.sum(1), np.maximum((matrix!=0).sum(1), 1))
            inds = np.where(matrix == 0)
            matrix[inds] = np.take(avergae, inds[0])
      
    else:
        matrix = np.full((num_users, num_items), init_value)
        for (_, userID, itemID, rating, timestamp) in rating_df.itertuples():
            matrix[userID, itemID] = rating

    return matrix

In [None]:
# Creating a sparse pivot table with users in rows and items in columns
users_items_matrix_train_zero = dataPreprocessor(train_df, num_users, num_locations, 0)
users_items_matrix_validate = dataPreprocessor(validate_df, num_users, num_locations, 0)
users_items_matrix_test = dataPreprocessor(test_df, num_users, num_locations, 0)

## Utility Function

In [None]:
def show_error(history, skip):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    plt.plot(np.arange(skip, len(loss), 1), loss[skip:])
    plt.plot(np.arange(skip, len(loss), 1), val_loss[skip:])
    plt.title('model train vs validation loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='best')
    plt.show()

In [None]:
def show_rmse(history, skip):
    rmse = history.history['masked_rmse_clip']
    val_rmse = history.history['val_masked_rmse_clip']
    plt.plot(np.arange(skip, len(rmse), 1), rmse[skip:])
    plt.plot(np.arange(skip, len(val_rmse), 1), val_rmse[skip:])
    plt.title('model train vs validation masked_rmse')
    plt.ylabel('rmse')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='best')
    plt.show()

In [None]:
def load_model(name):
    # load json and create model
    model_file = open('{}.json'.format(name), 'r')
    loaded_model_json = model_file.read()
    model_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights("{}.h5".format(name))
    print("Loaded model from disk")
    return loaded_model

In [None]:
def save_model(name, model):
    # # serialize model to JSON
    model_json = model.to_json()
    with open("{}.json".format(name), "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("{}.h5".format(name))
    print("Saved model to disk")

In [None]:
def masked_se(y_true, y_pred):
    # masked function
    mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
    # masked squared error
    masked_squared_error = K.square(mask_true * (y_true - y_pred))
    masked_mse = K.sum(masked_squared_error, axis=-1)
    return masked_mse

In [None]:
def masked_mse(y_true, y_pred):
    # masked function
    mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
    # masked squared error
    masked_squared_error = K.square(mask_true * (y_true - y_pred))
    masked_mse = K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1)
    return masked_mse

In [None]:
def masked_rmse(y_true, y_pred):
    # masked function
    mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
    # masked squared error
    masked_squared_error = K.square(mask_true * (y_true - y_pred))
    masked_mse = K.sqrt(K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1))
    return masked_mse

In [None]:
def masked_rmse_clip(y_true, y_pred):
    # masked function
    mask_true = K.cast(K.not_equal(y_true, 0), K.floatx())
    y_pred = K.clip(y_pred, 1, 5)
    # masked squared error
    masked_squared_error = K.square(mask_true * (y_true - y_pred))
    masked_mse = K.sqrt(K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1))
    return masked_mse

## AutoEncoder

### Version: Deep Auto Encoder Collaborative Filtering

In [None]:
def Deep_AE_model(X, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode, side_infor_size=0):
    '''
    Build Deep AE for CF
        INPUT: 
            X: #_user X #_item matrix
            layers: List, each element is the number of neuron for a layer
            reg: L2 regularization parameter
            activation: activation function for all dense layer except the last
            last_activation: activation function for the last dense layer
            dropout: dropout rate
            regularizer_encode: regularizer for encoder
            regularizer_decode: regularizer for decoder
            side_infor_size: size of the one hot encoding side information
        OUTPUT:
            Keras model
    '''

    # Input
    input_layer = x = Input(shape=(X.shape[1],), name='UserRating')
    
    # Encoder
    # -----------------------------
    k = int(len(layers)/2)
    i = 0
    for l in layers[:k]:
        x = Dense(l, activation=activation,
                      name='EncLayer{}'.format(i), kernel_regularizer=regularizers.l2(regularizer_encode))(x)
        i = i+1
      
      
    # Latent Space
    # -----------------------------
    x = Dense(layers[k], activation=activation, 
                                name='LatentSpace', kernel_regularizer=regularizers.l2(regularizer_encode))(x)
    
    # Dropout
    x = Dropout(rate = dropout)(x)
    
    # Decoder
    # -----------------------------
    for l in layers[k+1:]:
        i = i-1
        x = Dense(l, activation=activation, 
                      name='DecLayer{}'.format(i), kernel_regularizer=regularizers.l2(regularizer_decode))(x)
      
    # Output

    output_layer = Dense(X.shape[1]-side_infor_size, activation=last_activation, name='UserScorePred', kernel_regularizer=regularizers.l2(regularizer_decode))(x)

    # this model maps an input to its reconstruction
    model = Model(input_layer, output_layer)

    return model

In [None]:
################### Layer 선택 ###################################
#layers = [256, 512, 256]
layers = [1024, 512, 258, 512, 1024]
#layers = [512, 256, 512]
#layers = [128, 256, 512, 256, 128]
#layers = [512, 512, 512]
##################################################################

dropout = 0.8
# activation = 'sigmoid'
# last_activation = 'linear'
activation = 'selu'
last_activation = 'selu'
regularizer_encode = 0.001
regularizer_decode = 0.001

In [25]:
users_items_matrix_train_zero.shape

(7613, 9929)

: **user가 7613명, item이 9929개**가 있다.

In [None]:
# 버전호환
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [27]:
# Build model
Deep_AE = Deep_AE_model(users_items_matrix_train_zero, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode)
Deep_AE.compile(optimizer = optimizer, loss=masked_mse, metrics=[masked_rmse_clip]) 
Deep_AE.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
UserRating (InputLayer)      [(None, 9929)]            0         
_________________________________________________________________
EncLayer0 (Dense)            (None, 1024)              10168320  
_________________________________________________________________
EncLayer1 (Dense)            (None, 512)               524800    
_________________________________________________________________
LatentSpace (Dense)          (None, 258)               132354    
_________________________________________________________________
dropout (Dropout)            (None, 258)               0         
_________________________________________________________________
DecLayer1 (Dense)            (None, 512)               132608    
_________________________________________________________________
DecLayer0 (Dense)            (None, 1024)              525312

: 모델에 대해서 설명하자면, **9929 차원의 vector(user별 vector)** 가 들어가면서 학습되는 형태이다.

-----

### 학습시작
: user_items_matrix_train_zero 가 들어가면 똑같이 그 행렬이 나오도록 학습한다.

In [28]:
hist_Deep_AE = Deep_AE.fit(x=users_items_matrix_train_zero, y=users_items_matrix_train_zero,
                  epochs=200,
                  batch_size=256,
                validation_data=[users_items_matrix_train_zero, users_items_matrix_validate], verbose=2)

Epoch 1/200
30/30 - 2s - loss: 18.6778 - masked_rmse_clip: 2.8114 - val_loss: 4.9461 - val_masked_rmse_clip: 0.0000e+00
Epoch 2/200
30/30 - 2s - loss: 11.7746 - masked_rmse_clip: 1.9727 - val_loss: 4.5235 - val_masked_rmse_clip: 0.0000e+00
Epoch 3/200
30/30 - 2s - loss: 8.0070 - masked_rmse_clip: 1.2914 - val_loss: 4.2228 - val_masked_rmse_clip: 0.0000e+00
Epoch 4/200
30/30 - 2s - loss: 6.0273 - masked_rmse_clip: 0.9553 - val_loss: 3.9878 - val_masked_rmse_clip: 0.0000e+00
Epoch 5/200
30/30 - 2s - loss: 5.0741 - masked_rmse_clip: 0.7750 - val_loss: 3.8019 - val_masked_rmse_clip: 0.0000e+00
Epoch 6/200
30/30 - 2s - loss: 4.5671 - masked_rmse_clip: 0.6762 - val_loss: 3.6471 - val_masked_rmse_clip: 0.0000e+00
Epoch 7/200
30/30 - 2s - loss: 4.2654 - masked_rmse_clip: 0.6118 - val_loss: 3.5154 - val_masked_rmse_clip: 0.0000e+00
Epoch 8/200
30/30 - 2s - loss: 4.0716 - masked_rmse_clip: 0.5804 - val_loss: 3.4015 - val_masked_rmse_clip: 0.0000e+00
Epoch 9/200
30/30 - 2s - loss: 3.9150 - masked

#### output

In [29]:
predict_deep = Deep_AE.predict(users_items_matrix_train_zero)
predict_deep.shape

(7613, 9929)

In [30]:
users_items_matrix_train_zero

array([[5, 0, 0, ..., 0, 0, 0],
       [0, 5, 4, ..., 0, 0, 0],
       [4, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 5, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [31]:
predict_deep

array([[4.4918113, 4.170216 , 3.8366194, ..., 3.303756 , 2.8397808,
        3.541355 ],
       [3.4924119, 4.45126  , 4.4499288, ..., 2.3121154, 3.877664 ,
        4.797636 ],
       [3.9160361, 3.7386346, 3.3459127, ..., 2.57929  , 2.4354897,
        3.0344338],
       ...,
       [4.911874 , 4.5376353, 4.430986 , ..., 4.9342937, 3.4834597,
        4.3523464],
       [4.5478935, 4.2272234, 4.283716 , ..., 4.0435815, 3.791431 ,
        4.72086  ],
       [4.00671  , 4.658057 , 4.8302236, ..., 3.9826205, 4.025987 ,
        4.9986563]], dtype=float32)

: 결과 값에 대한 해석을 어떻게 해야할지 모르겠지만, 일단, 위의 matrix와 비교하면, 5점에 대한 부분에서의 결과값이 다른 값들 보다 높게 생성되었으며, 4점에 대한 부분 역시도 5점보다는 높게, 0점보다는 낮게 생성된 것을 알 수 있다.

: 하지만 결과 값에 대한 해석에서는 이견이 있을 것이라고 생각하기 때문에 필요성을 검토해봐야한다.

#### method_2)  Gaussin noise를 추가하여 다시 학습을 진행해보자.

In [None]:
## Adding Gaussin noise to input
noise_factor = 0.2
users_items_matrix_train_zero_noisy = users_items_matrix_train_zero + noise_factor * np.random.normal(size=users_items_matrix_train_zero.shape) 

In [33]:
users_items_matrix_train_zero_noisy

array([[ 4.94536957, -0.1920947 ,  0.21792335, ..., -0.00666214,
         0.03230252,  0.0571985 ],
       [ 0.27287078,  5.11951386,  3.84962424, ...,  0.28601788,
        -0.14667009,  0.0539903 ],
       [ 4.1587367 , -0.23104066, -0.22737723, ...,  0.04253071,
         0.01394342, -0.12779111],
       ...,
       [-0.25118244,  0.23314809,  0.10845348, ...,  4.744131  ,
        -0.03026341,  0.20786581],
       [ 0.11158167,  0.00885028, -0.24205023, ...,  0.32921039,
        -0.47297254, -0.13949017],
       [-0.18126535, -0.21107656,  0.01959683, ..., -0.13114641,
        -0.40416711, -0.45826687]])

: noise를 추가하여 학습을 진행시켜보자.

In [None]:
################### Layer 선택 ###################################
#layers = [256, 512, 256]
layers = [1024, 512, 258, 512, 1024]
#layers = [512, 256, 512]
#layers = [128, 256, 512, 256, 128]
#layers = [512, 512, 512]
##################################################################

dropout = 0.8
# activation = 'sigmoid'
# last_activation = 'linear'
activation = 'selu'
last_activation = 'selu'
regularizer_encode = 0.001
regularizer_decode = 0.001

In [35]:
# Build model
Deep_AE = Deep_AE_model(users_items_matrix_train_zero_noisy, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode)
Deep_AE.compile(optimizer = optimizer, loss=masked_mse, metrics=[masked_rmse_clip]) 
Deep_AE.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
UserRating (InputLayer)      [(None, 9929)]            0         
_________________________________________________________________
EncLayer0 (Dense)            (None, 1024)              10168320  
_________________________________________________________________
EncLayer1 (Dense)            (None, 512)               524800    
_________________________________________________________________
LatentSpace (Dense)          (None, 258)               132354    
_________________________________________________________________
dropout_1 (Dropout)          (None, 258)               0         
_________________________________________________________________
DecLayer1 (Dense)            (None, 512)               132608    
_________________________________________________________________
DecLayer0 (Dense)            (None, 1024)              5253

In [None]:
hist_Deep_AE = Deep_AE.fit(x=users_items_matrix_train_zero_noisy, y=users_items_matrix_train_zero,
                  epochs=200,
                  batch_size=256,
                  validation_data=[users_items_matrix_train_zero_noisy, users_items_matrix_validate], verbose=2)

Epoch 1/200
30/30 - 2s - loss: 14.3005 - masked_rmse_clip: 2.0632 - val_loss: 4.5496 - val_masked_rmse_clip: 0.0000e+00
Epoch 2/200
30/30 - 2s - loss: 6.8654 - masked_rmse_clip: 0.9937 - val_loss: 4.4617 - val_masked_rmse_clip: 0.0000e+00
Epoch 3/200
30/30 - 2s - loss: 5.4803 - masked_rmse_clip: 0.6947 - val_loss: 4.3856 - val_masked_rmse_clip: 0.0000e+00
Epoch 4/200
30/30 - 2s - loss: 5.0819 - masked_rmse_clip: 0.5867 - val_loss: 4.3082 - val_masked_rmse_clip: 0.0000e+00
Epoch 5/200
30/30 - 2s - loss: 4.8946 - masked_rmse_clip: 0.5525 - val_loss: 4.2371 - val_masked_rmse_clip: 0.0000e+00
Epoch 6/200
30/30 - 2s - loss: 4.7439 - masked_rmse_clip: 0.5142 - val_loss: 4.1708 - val_masked_rmse_clip: 0.0000e+00
Epoch 7/200
30/30 - 2s - loss: 4.6242 - masked_rmse_clip: 0.4875 - val_loss: 4.1074 - val_masked_rmse_clip: 0.0000e+00
Epoch 8/200
30/30 - 2s - loss: 4.5314 - masked_rmse_clip: 0.4740 - val_loss: 4.0473 - val_masked_rmse_clip: 0.0000e+00
Epoch 9/200
30/30 - 2s - loss: 4.4442 - masked_

In [None]:
predict_deep_ver2 = Deep_AE.predict(users_items_matrix_train_zero_noisy)
predict_deep_ver2.shape

In [None]:
predict_deep_ver2

: 이 역시도 결과가 많이 좋게는 나오지 않는 것 같다.

-----------------

#### method_3) Hybrid

: 활용하기로 한 정보를 라벨 인코딩/원핫 인코딩 하여 옆에 붙여서 사용하여 실험을 적용해 볼 수 있다. 예시로 photo_len를 사용해보자.

In [None]:
data = pd.read_json("/content/drive/My Drive/TOBIGS/Recommender System/TA_User_Reviws_Korea_all.json", typ='frame')

In [None]:
data['photo_len'] = data.photoIds.apply(lambda x: len(x))

In [None]:
df = data[['userId', 'location.name', 'rating','photo_len','createdDate']]
df.columns = ['user_emb_id', 'location_emb_id', 'rating', 'photo_len', 'timestamp']

In [None]:
photo_len_df = df.groupby('user_emb_id')['photo_len'].mean().reset_index()
photo_len_df['photo_len'] = preprocessing.LabelEncoder().fit(photo_len_df['photo_len']).transform(photo_len_df['photo_len'])
onehot_df = preprocessing.OneHotEncoder(handle_unknown='ignore', sparse=False).fit(photo_len_df[['photo_len']]).transform(photo_len_df[['photo_len']])

: shape을 다시 맞춰주자

In [None]:
df = data[['userId', 'location.name', 'rating', 'createdDate']]
df.columns = ['user_emb_id', 'location_emb_id', 'rating', 'timestamp']

In [None]:
user_dict = {}
for idx, unique_user in enumerate(df.user_emb_id.unique()):
    user_dict[unique_user] = idx
    
location_dict = {}
for idx, unique_location in enumerate(df.location_emb_id.unique()):
    location_dict[unique_location] = idx

In [None]:
df['user_emb_id'] = df['user_emb_id'].replace(user_dict)
df['location_emb_id'] = df['location_emb_id'].replace(location_dict)

In [None]:
#+1 is the real size, as they are zero based
num_users = df['user_emb_id'].unique().max() + 1
num_locations = df['location_emb_id'].unique().max() + 1

In [None]:
def dataPreprocessor(rating_df, num_users, num_items, init_value=0, average=False):
    """
        INPUT: 
            data: pandas DataFrame. columns=['userID', 'itemID', 'rating' ...]
            num_row: int. number of users
            num_col: int. number of items
            
        OUTPUT:
            matrix: 2D numpy array. 
    """
    if average:
        matrix = np.full((num_users, num_items), 0.0)
        for (_, userID, itemID, rating, timestamp) in rating_df.itertuples():
            matrix[userID, itemID] = rating
            avergae = np.true_divide(matrix.sum(1), np.maximum((matrix!=0).sum(1), 1))
            inds = np.where(matrix == 0)
            matrix[inds] = np.take(avergae, inds[0])
      
    else:
        matrix = np.full((num_users, num_items), init_value)
        for (_, userID, itemID, rating, timestamp) in rating_df.itertuples():
            matrix[userID, itemID] = rating

    return matrix

In [None]:
users_items_matrix_zero = dataPreprocessor(df, num_users, num_locations, 0)

In [90]:
users_items_matrix_zero.shape, onehot_df.shape

((7613, 9929), (7613, 405))

In [None]:
user_items_matrix_zero_concat = np.concatenate([users_items_matrix_zero, onehot_df],axis=1)

In [None]:
user_items_matrix_zero_concat_original[:,:9929]

In [None]:
user_items_user_info_train, user_items_user_info_test = train_test_split(user_items_matrix_zero_concat,
                                     test_size=0.1,
                                     random_state=999613182)

In [None]:
user_items_user_info_train, user_items_user_info_validation = train_test_split(user_items_user_info_train,
                                 test_size=0.1,
                                 random_state=999613182)

In [None]:
user_item_user_info = user_items_user_info_train[:,:9929]

In [97]:
user_items_user_info_train.shape

(6165, 10334)

In [98]:
user_item_user_info.shape

(6165, 9929)

: concat한 데이터와 concat하지 않은 데이터를 동시에 준비한다.

In [None]:
user_item_user_info_val = user_items_user_info_validation[:,:9929]

In [100]:
user_item_user_info_val.shape

(686, 9929)

In [101]:
layers = [1024, 512, 256, 512, 1024]
dropout = 0.8
activation = 'selu'
last_activation = 'selu'
regularizer_encode = 0.001
regularizer_decode = 0.001
# Build model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
Deep_AE_concate = Deep_AE_model(user_items_user_info_train, layers, activation, last_activation, dropout, regularizer_encode, regularizer_decode, 30)
Deep_AE_concate.compile(optimizer = optimizer, loss=masked_mse, metrics=[masked_rmse_clip]) 
Deep_AE_concate.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
UserRating (InputLayer)      [(None, 10334)]           0         
_________________________________________________________________
EncLayer0 (Dense)            (None, 1024)              10583040  
_________________________________________________________________
EncLayer1 (Dense)            (None, 512)               524800    
_________________________________________________________________
LatentSpace (Dense)          (None, 256)               131328    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
DecLayer1 (Dense)            (None, 512)               131584    
_________________________________________________________________
DecLayer0 (Dense)            (None, 1024)              5253

In [109]:
hist_Deep_AE_concate = Deep_AE_concate.fit(x=user_items_user_info_train, y=user_items_user_info_train,
                  epochs=500,
                  batch_size=256,
                  validation_data=[user_items_user_info_train, user_items_user_info_validation], verbose=2)

Epoch 1/500


ValueError: ignored

----

#### re-feeding
: 결과물로 나온 행렬을 다시 모델에 학습시킨다. 이로써 sparse_matrix를 dense_matrix로 탈바꿈 시킬 수 있다.

In [None]:
users_items_matrix_train_zero_refeeding = Deep_AE.predict(users_items_matrix_train_zero)

In [None]:
hist_Deep_AE_refeeding = Deep_AE.fit(x=users_items_matrix_train_zero, y=users_items_matrix_train_zero,
                  epochs=20,
                  batch_size=512,
                  validation_data=[users_items_matrix_train_zero, users_items_matrix_validate], verbose=2)

In [None]:
test_result_deep = Deep_AE.evaluate(users_items_matrix_train_zero, users_items_matrix_test)