In [None]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import numpy as np

# <ins>Activations:<ins>

### <ins>Sigmoid:<ins>
$ \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} $

### <ins>Softmax:<ins>
$ \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)} $

# <ins>Loss Functions:<ins>

### <ins>Binary Cross-Entropy (BCE) Loss:<ins>
$ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
        l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right] $

### <ins>Cross-Entropy Loss:<ins>
Softmax function is often used to convert the raw model outputs (logits) into probabilities. Cross-Entropy Loss measures the dissimilarity between this predicted probability distribution and the true distribution (one-hot encoded labels).

$ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
          l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\} $

### <ins>Mean-Squared Error (MSE) Loss:<ins>
$ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
        l_n = \left( x_n - y_n \right)^2 $

# <ins>Definitions:<ins>

### <ins>Logits:<ins>
$ \text{logit}_i = \log\left(\frac{p_i}{1 - \sum_{j=1}^{K-1} p_j}\right) $,
all classes except the $i$-th class in denominator

### <ins>Sigmoid Probability:</ins>
$ p = \frac{1}{1 + \exp(-\text{logit})} $


### <ins>Softmax Probability:<ins>
$ p_i = \frac{\exp(\text{logit}_i)}{\sum_{j=1}^{K} \exp(\text{logit}_j)} $,
all classes including the $i$-th class in denominator

In [None]:
class UniversalPerceptron(tf.keras.Model):
    def __init__(self, input_size, output_size, task_type="binary_classification"):
        super(UniversalPerceptron, self).__init__()
        self.fc = tf.keras.layers.Dense(output_size)

        if task_type == "binary_classification":
            self.activation = tf.keras.activations.sigmoid
            self.loss_function = tf.keras.losses.BinaryCrossentropy()
        elif task_type == "multi_class_classification":
            self.activation = tf.keras.activations.softmax
            self.loss_function = tf.keras.losses.CategoricalCrossentropy()
        elif task_type == "regression":
            self.activation = tf.keras.activations.linear
            self.loss_function = tf.keras.losses.MeanSquaredError()
        else:
            raise ValueError(
                "Invalid task_type. Supported types are 'binary_classification', 'multi_class_classification', and 'regression'."
            )

    def call(self, inputs):
        x = self.fc(inputs)
        return self.activation(x)


class UniversalPerceptronTrainer:
    def __init__(self, model, optimizer):
        self.model = model
        self.optimizer = optimizer

    def train(self, inputs, labels, epochs=100):
        for epoch in range(epochs):
            # tf.GradientTape context is used to record operations for automatic differentiation.
            # It allows TensorFlow to compute gradients with respect to the variables inside the block.
            with tf.GradientTape() as tape:
                outputs = self.model(inputs)
                loss = self.model.loss_function(labels, outputs)
            
            # tape.gradient function computes the gradients of the loss with respect to the 
            # trainable variables (model parameters) using operations recorded in context of 
            # this tape. These gradients will be used to update the model during optimization.
            gradients = tape.gradient(loss, self.model.trainable_variables)

            # optimizer's apply_gradients method updates the model's parameters using the 
            # computed gradients. The zip function combines each gradient with its corresponding 
            # trainable variable, forming pairs that are applied together to update the model.
            self.optimizer.apply_gradients(
                zip(gradients, self.model.trainable_variables)
            )

            if (epoch + 1) % 10 == 0:
                print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.numpy():.4f}")

    def predict(self, inputs):
        # tf.GradientTape() context manager is not used, TensorFlow won't compute gradients for 
        # following operations
        predictions = self.model(inputs)
        if isinstance(self.model.loss_function, tf.keras.losses.BinaryCrossentropy):
            # class with sigmoid probability >= 0.5
            return (predictions >= 0.5).numpy()
        elif isinstance(
            self.model.loss_function, tf.keras.losses.CategoricalCrossentropy
        ):
            # class with highest softmax probability
            return np.argmax(predictions, axis=1)
        else:
            return predictions.numpy()

In [None]:
# Example usage
if __name__ == "__main__":
    # Generate some random training data
    np.random.seed(42)
    input_size = 2
    data_size = 100
    inputs = np.random.rand(data_size, input_size).astype(np.float32)
    labels_binary_cls = np.random.randint(2, size=(data_size, 1)).astype(np.float32)
    labels_multi_cls = to_categorical(
        np.random.randint(3, size=(data_size)).astype(np.int32), num_classes=3
    )
    labels_regression = np.random.rand(data_size, 1).astype(np.float32)

    # Create a UniversalPerceptron model for binary classification
    binary_cls_model = UniversalPerceptron(
        input_size, 1, task_type="binary_classification"
    )

    # Define optimizer
    binary_cls_optimizer = tf.optimizers.SGD(learning_rate=0.01)

    # Create a UniversalPerceptronTrainer instance for binary classification
    binary_cls_trainer = UniversalPerceptronTrainer(
        binary_cls_model, binary_cls_optimizer
    )

    # Train the model for binary classification
    binary_cls_trainer.train(inputs, labels_binary_cls, epochs=100)

    # Test the trained model with new data for binary classification
    test_inputs = np.random.rand(5, input_size).astype(np.float32)
    binary_cls_predictions = binary_cls_trainer.predict(test_inputs)
    print("Binary Classification Predictions:", binary_cls_predictions)

    # Create a UniversalPerceptron model for multi-class classification
    multi_cls_model = UniversalPerceptron(
        input_size, 3, task_type="multi_class_classification"
    )

    # Define optimizer
    multi_cls_optimizer = tf.optimizers.SGD(learning_rate=0.01)

    # Create a UniversalPerceptronTrainer instance for multi-class classification
    multi_cls_trainer = UniversalPerceptronTrainer(multi_cls_model, multi_cls_optimizer)

    # Train the model for multi-class classification
    multi_cls_trainer.train(inputs, labels_multi_cls, epochs=100)

    # Test the trained model with new data for multi-class classification
    multi_cls_predictions = multi_cls_trainer.predict(test_inputs)
    print("Multi-Class Classification Predictions:", multi_cls_predictions)

    # Create a UniversalPerceptron model for regression
    regression_model = UniversalPerceptron(input_size, 1, task_type="regression")

    # Define optimizer
    regression_optimizer = tf.optimizers.SGD(learning_rate=0.01)

    # Create a UniversalPerceptronTrainer instance for regression
    regression_trainer = UniversalPerceptronTrainer(
        regression_model, regression_optimizer
    )

    # Train the model for regression
    regression_trainer.train(inputs, labels_regression, epochs=100)

    # Test the trained model with new data for regression
    regression_predictions = regression_trainer.predict(test_inputs)
    print("Regression Predictions:", regression_predictions)