为了将因子分解机（Factorization Machine, FM）扩展为深度因子分解机（DeepFM），我们需要在原有的 FM 模型基础上添加一个深度神经网络层。DeepFM 结合了 FM 和深度神经网络的优点，可以自动学习特征间的低阶和高阶交互。

我们需要做以下几件事：
1. 添加一个深度神经网络（DNN）层。
2. 更新 `predict` 方法，以包括 DNN 的计算。
3. 在训练过程中同时更新 DNN 的参数。

### 代码说明

1. **DNN权重初始化**：
   - `_init_dnn_weights` 方法初始化 DNN 的权重和偏置。

2. **前向传播**：
   - `_forward_dnn` 方法通过 DNN 层进行前向传播。
   - 使用 Sigmoid 激活函数。

3. **反向传播**：
   - `_backward_dnn` 方法计算 DNN 的梯度，用于更新权重和偏置。

4. **权重更新**：
   - 在 `_update_weights` 方法中同时更新 FM 和 DNN 的权重。

5. **预测**：
   - `predict` 方法计算线性输出、特征交互输出和 DNN 输出的总和，生成最终预测值。

### 示例用法

代码生成随机数据集并训练 DeepFM，然后对单个样本进行预测，输出预测值。

通过上述修改，DeepFM 模型能够同时捕捉特征之间的低阶和高阶交互，提供更强大的预测性能。

In [3]:
import numpy as np

class FactorizationMachine:
    def __init__(self, n_features, k, hidden_layers, learning_rate=0.01, n_epochs=10):
        self.n_features = n_features  # Total number of features
        self.k = k  # Embedding dimension
        self.hidden_layers = hidden_layers  # DNN hidden layers
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs

        # Initialize FM weights
        self.w0 = 0  # Bias term
        self.W = np.zeros(n_features)  # Linear terms
        self.V = np.random.normal(scale=0.01, size=(n_features, k))  # Interaction terms

        # Initialize DNN weights
        self.dnn_weights = self._init_dnn_weights()

    def _init_dnn_weights(self):
        dnn_weights = {}
        layer_sizes = [self.n_features] + self.hidden_layers

        for i in range(len(layer_sizes) - 1):
            dnn_weights[f'W{i+1}'] = np.random.normal(
                scale=0.01, size=(layer_sizes[i], layer_sizes[i+1])
            )
            dnn_weights[f'b{i+1}'] = np.zeros(layer_sizes[i+1])
        
        return dnn_weights

    def fit(self, X, y):
        """ Train the model with input features X and target y """
        for epoch in range(self.n_epochs):
            for i in range(X.shape[0]):
                self._update_weights(X[i], y[i])
    
    def _update_weights(self, x, y):
        """ Stochastic Gradient Descent to update weights """
        linear_output = self.w0 + np.dot(self.W, x)
        interaction_output = 0.5 * np.sum(
            (np.dot(x, self.V) ** 2) - np.dot(x ** 2, self.V ** 2)
        )
        
        dnn_output, dnn_gradients = self._forward_dnn(x)
        prediction = linear_output + interaction_output + dnn_output
        error = y - self.sigmoid(prediction)

        # FM weights update
        self.w0 += self.learning_rate * error
        self.W += self.learning_rate * error * x
        for f in range(self.n_features):
            self.V[f] += self.learning_rate * error * (np.dot(x, self.V) - x[f] * self.V[f]) * x[f]

        # DNN weights update
        for i in range(len(self.hidden_layers)):
            self.dnn_weights[f'W{i+1}'] += self.learning_rate * error * dnn_gradients[f'W{i+1}']
            self.dnn_weights[f'b{i+1}'] += self.learning_rate * error * dnn_gradients[f'b{i+1}']

    def _forward_dnn(self, x):
        """ Forward pass through the DNN """
        a = x
        cache = {'a0': a}

        for i in range(len(self.hidden_layers)):
            z = np.dot(a, self.dnn_weights[f'W{i+1}']) + self.dnn_weights[f'b{i+1}']
            a = self.sigmoid(z)
            cache[f'a{i+1}'] = a
            cache[f'z{i+1}'] = z

        gradients = self._backward_dnn(cache, x)
        return a.sum(), gradients

    def _backward_dnn(self, cache, x):
        """ Backward pass through the DNN """
        gradients = {}
        L = len(self.hidden_layers)
        a_last = cache[f'a{L}']

        for i in reversed(range(1, L+1)):
            da = a_last * (1 - a_last)
            dz = da  # sigmoid derivative
            a_prev = cache[f'a{i-1}']
            gradients[f'W{i}'] = np.outer(a_prev, dz)
            gradients[f'b{i}'] = dz
            a_last = a_prev

        return gradients

    def predict(self, x):
        """ Make prediction for a single input feature vector x """
        linear_output = self.w0 + np.dot(self.W, x)
        interaction_output = 0.5 * np.sum(
            (np.dot(x, self.V) ** 2) - np.dot(x ** 2, self.V ** 2)
        )
        
        dnn_output, _ = self._forward_dnn(x)
        prediction = linear_output + interaction_output + dnn_output
        return self.sigmoid(prediction)

    @staticmethod
    def sigmoid(x):
        # Clip input values to avoid overflow
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))


# Test the Factorization Machine with Deep component (DeepFM)
num_samples = 1000  # Number of samples
num_features = 10  # Number of features
embedding_dim = 5  # Embedding dimension
hidden_layers = [10, 5]  # Hidden layers for the DNN

# Generate random dataset
X = np.random.randint(2, size=(num_samples, num_features))
y = np.random.randint(2, size=num_samples)

# Train Factorization Machine
fm = FactorizationMachine(n_features=num_features, k=embedding_dim, hidden_layers=hidden_layers, learning_rate=0.01, n_epochs=10)
fm.fit(X, y)

# Make predictions
prediction = fm.predict(X[0])
print(prediction)  # Print first prediction

0.4421879634203969
