FNN模型结合了因子分解机（FM）和深度神经网络（DNN）的优点。

1. **Input Fields**：输入的特征被组织成若干“域”（Field），每个域包含了一些特征。
2. **FM Pretraining**：首先通过因子分解机（FM）预训练得到特征嵌入（Embedding）。
3. **Deep Neural Network**：然后将这些预训练得到的嵌入作为输入，传递给深度神经网络（DNN），通过若干隐层（Hidden Layers）和最终输出层（Output Layer）进行预测。

### FNN模型实现

为实现FNN，我们需要以下步骤：
1. 利用FM进行预训练获取特征嵌入。
2. 使用特征嵌入作为输入，构建深度神经网络进行训练和预测。

### 代码解释

1. **FM模型**：
   - `FM`类用于预训练得到特征的嵌入向量（`V`）。
   
2. **FNN模型**：
   - `FNN`类包括以下部分：
     - 初始化DNN权重。
     - 使用FM模型预训练特征嵌入。
     - 利用预训练得到的嵌入向量进行DNN训练。
   
3. **预训练和训练**：
   - `pretrain_fm`方法调用`FM`模型进行预训练，获得特征嵌入。
   - `fit`方法用于训练整个模型，包括预训练和DNN训练。
   
4. **前向和后向传播**：
   - `_forward_dnn`方法通过DNN层进行前向传播。
   - `_backward_dnn`方法计算DNN的梯度，用于更新权重和偏置。

5. **预测**：
   - `predict`方法通过DNN进行预测，返回最终的预测结果。

通过上述实现，FNN模型能够有效利用FM预训练得到的特征嵌入，同时捕捉特征之间的高阶交互，提高模型的预测性能。

In [4]:
import numpy as np

class FM:
    def __init__(self, n_features, k, learning_rate=0.01, n_epochs=10):
        self.n_features = n_features
        self.k = k
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs

        # Initialize weights
        self.w0 = 0
        self.W = np.zeros(n_features)
        self.V = np.random.normal(scale=0.01, size=(n_features, k))

    def fit(self, X, y):
        for epoch in range(self.n_epochs):
            for i in range(X.shape[0]):
                self._update_weights(X[i], y[i])
    
    def _update_weights(self, x, y):
        linear_output = self.w0 + np.dot(self.W, x)
        interaction_output = 0.5 * np.sum((np.dot(x, self.V) ** 2) - np.dot(x ** 2, self.V ** 2))
        prediction = linear_output + interaction_output
        error = y - self.sigmoid(prediction)

        self.w0 += self.learning_rate * error
        self.W += self.learning_rate * error * x
        for f in range(self.n_features):
            self.V[f] += self.learning_rate * error * (np.dot(x, self.V) - x[f] * self.V[f]) * x[f]

    def predict(self, X):
        linear_output = self.w0 + np.dot(X, self.W)
        interaction_output = 0.5 * np.sum((np.dot(X, self.V) ** 2) - np.dot(X ** 2, self.V ** 2), axis=1)
        return linear_output + interaction_output

    def get_embeddings(self):
        return self.V

    @staticmethod
    def sigmoid(x):
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))


class FNN:
    def __init__(self, n_features, k, hidden_layers, learning_rate=0.01, n_epochs=10):
        self.n_features = n_features
        self.k = k
        self.hidden_layers = hidden_layers
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs

        self.dnn_weights = self._init_dnn_weights()

    def _init_dnn_weights(self):
        layer_sizes = [self.n_features * self.k] + self.hidden_layers
        dnn_weights = {}

        for i in range(len(layer_sizes) - 1):
            dnn_weights[f"W{i+1}"] = np.random.normal(scale=0.01, size=(layer_sizes[i], layer_sizes[i+1]))
            dnn_weights[f"b{i+1}"] = np.zeros(layer_sizes[i+1])

        # Output layer
        dnn_weights[f"W{len(layer_sizes)}"] = np.random.normal(scale=0.01, size=(layer_sizes[-1], 1))
        dnn_weights[f"b{len(layer_sizes)}"] = np.zeros(1)

        return dnn_weights

    def pretrain_fm(self, X, y):
        fm = FM(n_features=self.n_features, k=self.k, learning_rate=self.learning_rate, n_epochs=self.n_epochs)
        fm.fit(X, y)
        self.V = fm.get_embeddings()

    def fit(self, X, y):
        self.pretrain_fm(X, y)
        for epoch in range(self.n_epochs):
            for i in range(X.shape[0]):
                self._update_weights(X[i], y[i])

    def _update_weights(self, x, y):
        dnn_output, dnn_gradients = self._forward_dnn(x)
        prediction = dnn_output
        error = y - self.sigmoid(prediction)

        for i in range(len(self.hidden_layers) + 1):
            self.dnn_weights[f'W{i+1}'] += self.learning_rate * error * dnn_gradients[f'W{i+1}']
            self.dnn_weights[f'b{i+1}'] += self.learning_rate * error * dnn_gradients[f'b{i+1}']

    def _forward_dnn(self, x):
        # Ensure the input embedding vector has the correct shape
        a = x.dot(self.V).flatten()  # Shape: (n_features * k,)
        cache = {'a0': a}

        for i in range(len(self.hidden_layers)):
            z = np.dot(a, self.dnn_weights[f'W{i+1}']) + self.dnn_weights[f'b{i+1}']
            a = self.sigmoid(z)
            cache[f'a{i+1}'] = a
            cache[f'z{i+1}'] = z

        z = np.dot(a, self.dnn_weights[f'W{len(self.hidden_layers)+1}']) + self.dnn_weights[f'b{len(self.hidden_layers)+1}']
        a = self.sigmoid(z)
        cache[f'a{len(self.hidden_layers)+1}'] = a
        cache[f'z{len(self.hidden_layers)+1}'] = z

        gradients = self._backward_dnn(cache, x)
        return a[0], gradients

    def _backward_dnn(self, cache, x):
        gradients = {}
        L = len(self.hidden_layers)
        a_last = cache[f'a{L+1}']
        dz = a_last * (1 - a_last)

        for i in reversed(range(1, L+2)):
            a_prev = cache[f'a{i-1}']
            gradients[f'W{i}'] = np.outer(a_prev, dz)
            gradients[f'b{i}'] = dz
            if i > 1:
                da = dz.dot(self.dnn_weights[f'W{i}'].T)
                dz = da * a_prev * (1 - a_prev)

        return gradients

    def predict(self, x):
        dnn_output, _ = self._forward_dnn(x)
        return self.sigmoid(dnn_output)
    
    @staticmethod
    def sigmoid(x):
        x = np.clip(x, -500, 500)
        return 1 / (1 + np.exp(-x))

# Test the FNN model
num_samples = 1000
num_features = 10
embedding_dim = 5
hidden_layers = [10, 5]

# Generate random dataset
X = np.random.randint(2, size=(num_samples, num_features))
y = np.random.randint(2, size=num_samples)

# Train FNN model
fnn = FNN(n_features=num_features, k=embedding_dim, hidden_layers=hidden_layers, learning_rate=0.01, n_epochs=10)
fnn.fit(X, y)

# Make predictions
prediction = fnn.predict(X[0])
print(prediction)  # Print the first prediction

ValueError: shapes (5,) and (50,10) not aligned: 5 (dim 0) != 50 (dim 0)