In [None]:
# 第 1 部分
# 載入函式庫與資料集
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.models import Model
from keras.callbacks import LambdaCallback
from sklearn.model_selection import train_test_split
from sklearn import metrics

import numpy as np
import pandas as pd

np.random.seed(123456)
data = pd.read_csv('/kaggle/input/movielens/ratings.csv')

In [None]:
# 第 2 部分
# 資料清理
def get_data(data):

    # 刪除時間
    data.drop('timestamp', axis=1, inplace=True)
    
    # 抓出所有使用者跟電影索引
    users = data.userId.unique()
    movies = data.movieId.unique()
 
    # 找出新索引跟原始索引的對應關係
    moviemap={}
    for i in range(len(movies)):
        moviemap[movies[i]]=i
    usermap={}
    for i in range(len(users)):
        usermap[users[i]]=i
    
    # 更改成新索引成連續整數值
    data.movieId = data.movieId.apply(lambda x: moviemap[x])    
    data.userId = data.userId.apply(lambda x: usermap[x])    
        
    # 打亂資料
    data = data.sample(frac=1.0).reset_index(drop=True)
    
    # 建立訓練資料、測試資料集
    train, test = train_test_split(data, test_size=0.2)
    
    n_users = len(users)
    n_movies = len(movies)

    return train, test, n_users, n_movies

train, test, n_users, n_movies = get_data(data)

In [None]:
# 第 3 部分
# 定義基學習器的相關函式
def create_model(n_features=5, train_model=True, load_weights=False):

    fts = n_features
    
    # 輸入層接收資料
    # 嵌入層將資料轉換成n維矩陣
    # 展平層將n維矩陣拉直成陣列
    movie_in = Input(shape=[1], name="Movie")
    mov_embed = Embedding(n_movies, fts, name="Movie_Embed")(movie_in)
    flat_movie = Flatten(name="FlattenM")(mov_embed)
    
    user_in = Input(shape=[1], name="User")
    user_inuser_embed = Embedding(n_users, fts, name="User_Embed")(user_in)
    flat_user = Flatten(name="FlattenU")(user_inuser_embed)
    
    # 串接之後餵入密集層
    concat = Concatenate()([flat_movie, flat_user])
    dense_1 = Dense(128)(concat)
    dense_2 = Dense(32)(dense_1)
    out = Dense(1)(dense_2)
    
    # 編譯模型
    model = Model([user_in, movie_in], out)
    model.compile('adam', 'mean_squared_error')
    
    return model


In [None]:
# 第 4 部分
# 定義回呼函數並訓練模型
weights_dict = {}
weight_callback = LambdaCallback(on_epoch_end=lambda epoch, 
    logs: weights_dict.update({epoch:model.get_weights()}))

model = create_model(5)

history = model.fit([train.userId, train.movieId],
                    train.rating, 
                    epochs=10, 
                    callbacks=weight_callback,
                    verbose=1)


In [None]:
# 第 5 部分
# 集成模型
print('Base Learner')
print(metrics.mean_squared_error(test.rating, 
    model.predict([test.userId, test.movieId])))

final_weights = (np.array(weights_dict[9]) + 
                 np.array(weights_dict[8]) + 
                 np.array(weights_dict[6])) / 3.0
model.set_weights(final_weights)

print('Ensemble')
print(metrics.mean_squared_error(test.rating, 
    model.predict([test.userId, test.movieId])))