In [1]:
import numpy as np

def zeros_initialization(shape):
    return np.zeros(shape)

In [2]:
def random_initialization(shape):
    return np.random.randn(*shape) * 0.01

In [3]:
def xavier_initialization(shape):
    xavier_limit = np.sqrt(6 / (shape[0] + shape[1]))
    return np.random.uniform(low=-xavier_limit, high=xavier_limit, size=shape)

In [4]:
def he_initialization(shape):
    he_limit = np.sqrt(2 / shape[0])
    return np.random.randn(*shape) * he_limit

In [5]:
# Example usage
input_size = 100
hidden_size = 50
output_size = 10

# Initialize weights and biases using different methods
W1 = zeros_initialization((hidden_size, input_size))
b1 = zeros_initialization((hidden_size, 1))

W2 = random_initialization((output_size, hidden_size))
b2 = random_initialization((output_size, 1))

W3 = xavier_initialization((hidden_size, input_size))
b3 = xavier_initialization((hidden_size, 1))

W4 = he_initialization((output_size, hidden_size))
b4 = he_initialization((output_size, 1))

In [6]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [7]:
def tanh(z):
    return np.tanh(z)

In [8]:
def relu(z):
    return np.maximum(0, z)

In [9]:
def leaky_relu(z, alpha=0.01):
    return np.maximum(alpha * z, z)

In [None]:
# Example usage
z = np.array([-2, -1, 0, 1, 2])

# Apply activation functions to the input
sigmoid_output = sigmoid(z)
tanh_output = tanh(z)
relu_output = relu(z)
leaky_relu_output = leaky_relu(z)

print("Sigmoid output:", sigmoid_output)
print("Tanh output:", tanh_output)
print("ReLU output:", relu_output)
print("Leaky ReLU output:", leaky_relu_output)

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from time import time

# Define the activation functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def tanh(z):
    return np.tanh(z)

def relu(z):
    return np.maximum(0, z)

def leaky_relu(z, alpha=0.01):
    return np.maximum(alpha * z, z)

# Define the neural network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, initialization, activation):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.initialization = initialization
        self.activation = activation
        self.weights = None
        self.biases = None

    def initialize_weights(self):
        if self.initialization == 'zeros':
            self.weights = np.zeros((self.hidden_size, self.input_size))
            self.biases = np.zeros((self.hidden_size, 1))
        elif self.initialization == 'random':
            self.weights = np.random.randn(self.hidden_size, self.input_size) * 0.01
            self.biases = np.random.randn(self.hidden_size, 1) * 0.01
        elif self.initialization == 'xavier':
            xavier_limit = np.sqrt(6 / (self.input_size + self.hidden_size))
            self.weights = np.random.uniform(low=-xavier_limit, high=xavier_limit,
                                             size=(self.hidden_size, self.input_size))
            self.biases = np.random.uniform(low=-xavier_limit, high=xavier_limit,
                                            size=(self.hidden_size, 1))
        elif self.initialization == 'he':
            he_limit = np.sqrt(2 / self.input_size)
            self.weights = np.random.randn(self.hidden_size, self.input_size) * he_limit
            self.biases = np.random.randn(self.hidden_size, 1) * he_limit

    def forward_propagation(self, X):
        z = np.dot(self.weights, X) + self.biases
        if self.activation == 'sigmoid':
            return sigmoid(z)
        elif self.activation == 'tanh':
            return tanh(z)
        elif self.activation == 'relu':
            return relu(z)
        elif self.activation == 'leaky_relu':
            return leaky_relu(z)

    def train(self, X_train, y_train, learning_rate, num_epochs):
        self.initialize_weights()
        m = X_train.shape[1]
        costs = []

        for epoch in range(num_epochs):
            # Forward propagation
            A = self.forward_propagation(X_train)

            # Compute cost
            cost = -np.sum(y_train * np.log(A) + (1 - y_train) * np.log(1 - A)) / m
            costs.append(cost)

            # Backward propagation
            dZ = A - y_train
            dW = np.dot(dZ, X_train.T) / m
            db = np.sum(dZ, axis=1, keepdims=True) / m

            # Update parameters
            self.weights -= learning_rate * dW
            self.biases -= learning_rate * db

        return costs

    def predict(self, X):
        A = self.forward_propagation(X)
        return (A > 0.5).astype(int)

# Generate a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, random_state=42)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the list of initialization methods and activation functions to try
initializations = ['zeros', 'random', 'xavier', 'he']
activations = ['sigmoid', 'tanh', 'relu', 'leaky_relu']

# Perform experiments
results = []
for initialization in initializations:
    for activation in activations:
        start_time = time()
        # Create and train the neural network
        model = NeuralNetwork(input_size=X.shape[1], hidden_size=10, output_size=1,
                              initialization=initialization, activation=activation)
        costs = model.train(X_train.T, y_train.reshape(1, -1), learning_rate=0.01, num_epochs=100)
        # Evaluate the model on the test set
        y_pred = model.predict(X_test.T)
        accuracy = accuracy_score(y_test, y_pred.flatten())
        convergence_time = time() - start_time
        results.append((initialization, activation, accuracy, convergence_time))

# Display the results
print("Results:")
for initialization, activation, accuracy, convergence_time in results:
    print(f"Initialization: {initialization}, Activation: {activation}, "
          f"Accuracy: {accuracy:.4f}, Convergence Time: {convergence_time:.2f} seconds")

# Plot the cost curves
plt.figure(figsize=(10, 6))
for initialization, activation, _, _ in results:
    costs = [cost for cost in costs if cost is not None]
    plt.plot(range(len(costs)), costs, label=f"{initialization}_{activation}")
plt.xlabel("Epochs")
plt.ylabel("Cost")
plt.title("Cost Curves for Different Initialization Methods and Activation Functions")
plt.legend()
plt.show()

ValueError: ignored