In [1]:
import numpy as np

class SigmoidNeuron:
    def __init__(self):
        self.weights = None
        self.bias = None

    def initialize_parameters(self, num_features):
        self.weights = np.zeros((num_features, 1))
        self.bias = 0

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def forward_propagation(self, X):
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)

    def compute_cost(self, A, y):
        m = y.shape[0]
        cost = -np.sum(y * np.log(A) + (1 - y) * np.log(1 - A)) / m
        return cost

    def backward_propagation(self, X, A, y):
        m = y.shape[0]
        dZ = A - y
        dW = np.dot(X.T, dZ) / m
        dB = np.sum(dZ) / m
        return dW, dB

    def update_parameters(self, dW, dB, learning_rate):
        self.weights -= learning_rate * dW
        self.bias -= learning_rate * dB

    def train(self, X, y, num_iterations, learning_rate, optimizer='gd', batch_size=None,
              beta=0.9, epsilon=1e-8, beta1=0.9, beta2=0.999):
        m = X.shape[0]
        costs = []

        # Initialize parameters
        self.initialize_parameters(X.shape[1])

        # Optimization algorithms
        if optimizer == 'gd':  # Gradient Descent
            for i in range(num_iterations):
                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                self.update_parameters(dW, dB, learning_rate)
                costs.append(cost)

        elif optimizer == 'mbgd':  # Mini-Batch Gradient Descent
            num_batches = m // batch_size
            for i in range(num_iterations):
                for j in range(num_batches):
                    start = j * batch_size
                    end = start + batch_size
                    X_batch = X[start:end]
                    y_batch = y[start:end]
                    A = self.forward_propagation(X_batch)
                    cost = self.compute_cost(A, y_batch)
                    dW, dB = self.backward_propagation(X_batch, A, y_batch)
                    self.update_parameters(dW, dB, learning_rate)
                costs.append(cost)

        elif optimizer == 'momentum':  # Momentum-based Gradient Descent
            v_w = np.zeros_like(self.weights)
            v_b = 0
            for i in range(num_iterations):
                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                v_w = beta * v_w + (1 - beta) * dW
                v_b = beta * v_b + (1 - beta) * dB
                self.update_parameters(v_w, v_b, learning_rate)
                costs.append(cost)

        elif optimizer == 'nag':  # Nesterov Accelerated Gradient (NAG)
            v_w = np.zeros_like(self.weights)
            v_b = 0
            for i in range(num_iterations):
                # Save current parameters
                curr_weights = self.weights.copy()
                curr_bias = self.bias

                # Update parameters with momentum
                self.weights -= beta * v_w
                self.bias -= beta * v_b

                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                v_w = beta * v_w + (1 - beta) * dW
                v_b = beta * v_b + (1 - beta) * dB

                # Restore original parameters
                self.weights = curr_weights - learning_rate * v_w
                self.bias = curr_bias - learning_rate * v_b

                costs.append(cost)

        elif optimizer == 'adagrad':  # Adaptive Gradients (AdaGrad)
            cache_w = np.zeros_like(self.weights)
            cache_b = 0
            for i in range(num_iterations):
                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                cache_w += dW**2
                cache_b += dB**2
                self.update_parameters(dW / (np.sqrt(cache_w) + epsilon),
                                       dB / (np.sqrt(cache_b) + epsilon),
                                       learning_rate)
                costs.append(cost)

        elif optimizer == 'rmsprop':  # Root Mean Squared Propagation (RMSProp)
            cache_w = np.zeros_like(self.weights)
            cache_b = 0
            for i in range(num_iterations):
                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                cache_w = beta * cache_w + (1 - beta) * dW**2
                cache_b = beta * cache_b + (1 - beta) * dB**2
                self.update_parameters(dW / (np.sqrt(cache_w) + epsilon),
                                       dB / (np.sqrt(cache_b) + epsilon),
                                       learning_rate)
                costs.append(cost)

        elif optimizer == 'adam':  # Adam Optimization
            v_w = np.zeros_like(self.weights)
            v_b = 0
            s_w = np.zeros_like(self.weights)
            s_b = 0
            t = 0
            for i in range(num_iterations):
                t += 1
                A = self.forward_propagation(X)
                cost = self.compute_cost(A, y)
                dW, dB = self.backward_propagation(X, A, y)
                v_w = beta1 * v_w + (1 - beta1) * dW
                v_b = beta1 * v_b + (1 - beta1) * dB
                s_w = beta2 * s_w + (1 - beta2) * dW**2
                s_b = beta2 * s_b + (1 - beta2) * dB**2
                v_w_corrected = v_w / (1 - beta1**t)
                v_b_corrected = v_b / (1 - beta1**t)
                s_w_corrected = s_w / (1 - beta2**t)
                s_b_corrected = s_b / (1 - beta2**t)
                self.update_parameters(v_w_corrected / (np.sqrt(s_w_corrected) + epsilon),
                                       v_b_corrected / (np.sqrt(s_b_corrected) + epsilon),
                                       learning_rate)
                costs.append(cost)

        return costs

# Example usage

# Generate a sample dataset
np.random.seed(42)
X = np.random.randn(100, 10)
y = np.random.randint(0, 2, size=(100, 1))

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of the SigmoidNeuron class
neuron = SigmoidNeuron()

# Train the neuron using different optimization algorithms
gd_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='gd')
mbgd_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='mbgd', batch_size=16)
momentum_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='momentum', beta=0.9)
nag_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='nag', beta=0.9)
adagrad_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='adagrad')
rmsprop_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='rmsprop', beta=0.9)
adam_costs = neuron.train(X_train, y_train, num_iterations=100, learning_rate=0.01, optimizer='adam')

# Plot the cost curves
plt.figure(figsize=(10, 6))
plt.plot(range(len(gd_costs)), gd_costs, label='Gradient Descent')
plt.plot(range(len(mbgd_costs)), mbgd_costs, label='Mini-Batch GD')
plt.plot(range(len(momentum_costs)), momentum_costs, label='Momentum-based GD')
plt.plot(range(len(nag_costs)), nag_costs, label='Nesterov Accelerated GD')
plt.plot(range(len(adagrad_costs)), adagrad_costs, label='AdaGrad')
plt.plot(range(len(rmsprop_costs)), rmsprop_costs, label='RMSProp')
plt.plot(range(len(adam_costs)), adam_costs, label='Adam')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('Cost Curves for Different Optimization Algorithms')
plt.legend()
plt.show()

NameError: ignored