In [1]:
# 导入必要的库
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.optimizers import Adam

# 加载数据
ratings = pd.read_csv('ratings.csv', sep='\t', encoding='latin-1', usecols=['user_id', 'movie_id', 'rating'])

# 获取用户和电影的最大ID
n_users = ratings['user_id'].max() + 1
n_movies = ratings['movie_id'].max() + 1

In [2]:
# 划分训练集和测试集
X = ratings[['user_id', 'movie_id']].values
y = ratings['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# 构建模型
# 用户嵌入
user_input = Input(shape=(1,), name='user_input')
user_embedding = Embedding(input_dim=n_users, output_dim=50, name='user_embedding')(user_input)
user_vector = Flatten(name='user_vector')(user_embedding)

# 电影嵌入
movie_input = Input(shape=(1,), name='movie_input')
movie_embedding = Embedding(input_dim=n_movies, output_dim=50, name='movie_embedding')(movie_input)
movie_vector = Flatten(name='movie_vector')(movie_embedding)

# 拼接用户和电影向量
concat = Concatenate(name='concat')([user_vector, movie_vector])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [4]:
# 全连接层
dense1 = Dense(128, activation='relu', name='dense1')(concat)
dropout1 = Dropout(0.2, name='dropout1')(dense1)
dense2 = Dense(64, activation='relu', name='dense2')(dropout1)
dropout2 = Dropout(0.2, name='dropout2')(dense2)

# 输出评分
output = Dense(1, activation='linear', name='output')(dropout2)

In [5]:
# 构建和编译模型
# 设置自定义学习率
optimizer = Adam(learning_rate=0.001)
model = Model(inputs=[user_input, movie_input], outputs=output)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

In [None]:
# 查看模型结构
model.summary()

# 训练模型
history = model.fit(
    [X_train[:, 0], X_train[:, 1]], y_train,
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
    batch_size=64,
    epochs=5,
    verbose=1
)

# 预测示例
y_pred = model.predict([X_test[:, 0], X_test[:, 1]])
print(f"预测示例: {y_pred[:10].flatten()}")
print(f"真实值示例: {y_test[:10]}")


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
movie_input (InputLayer)        [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 50)        302050      user_input[0][0]                 
__________________________________________________________________________________________________
movie_embedding (Embedding)     (None, 1, 50)        197650      movie_input[0][0]                
______________________________________________________________________________________________

In [None]:
# 原始输出
print("\n训练日志:")
for key in history.history:
    print(f"{key}: {history.history[key]}")

# 计算 RMSE
rmse = np.sqrt(np.mean((y_pred.flatten() - y_test) ** 2))
print(f"\n测试集上的 RMSE: {rmse:.4f}")

# 计算 MAE
mae = mean_absolute_error(y_test, y_pred.flatten())
print(f"\n测试集上的 MAE: {mae:.4f}")
