## Weight initalisations 

In [3]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_circles
import csv
import os

# Custom Sine Activation Layer
class SineActivation(tf.keras.layers.Layer):
    def __init__(self, initial_frequency, **kwargs):
        super().__init__(**kwargs)
        self.initial_frequency = initial_frequency
    
    def build(self, input_shape):
        self.freq = self.add_weight(shape=(), 
                                    initializer=tf.keras.initializers.Constant(self.initial_frequency),
                                    trainable=True, 
                                    name='freq')
    
    def call(self, inputs):
        return tf.math.sin(self.freq * inputs)


X, y = make_circles(n_samples=2000, noise=0.1, factor=0.5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def create_model(activation, hidden_units, k_values, initializer):
    model = Sequential([layers.Input(shape=(2,))])
    
    sine_layers_k = []  
    for i, units in enumerate(hidden_units):
        if activation == 'sine':
            dense_layer = layers.Dense(units, 
                                       kernel_initializer=initializer, 
                                       bias_initializer=initializer, 
                                       name=f'dense_{i}')
            model.add(dense_layer)
            sine_activation_layer = SineActivation(initial_frequency=k_values[i], name=f'sine_{i}')
            model.add(sine_activation_layer)
            sine_layers_k.append(sine_activation_layer)
        else:
            model.add(layers.Dense(units, 
                                   activation=activation, 
                                   kernel_initializer=initializer, 
                                   bias_initializer=initializer, 
                                   name=f'dense_{i}'))
    
    model.add(layers.Dense(1, 
                           activation='sigmoid', 
                           kernel_initializer=initializer, 
                           bias_initializer=initializer, 
                           name='output'))
    return model, sine_layers_k


def train_step(model, x_batch, y_batch, optimizer_main, optimizer_k):
    with tf.GradientTape() as tape:
        predictions = model(x_batch, training=True)
        y_batch = tf.reshape(y_batch, (-1, 1))
        loss = tf.keras.losses.binary_crossentropy(y_batch, predictions)
        loss = tf.reduce_mean(loss)  # Ensure loss is a scalar
    
    grads = tape.gradient(loss, model.trainable_variables)
    
    k_vars = [v for v in model.trainable_variables if 'freq' in v.name]
    other_vars = [v for v in model.trainable_variables if 'freq' not in v.name]
    
    k_grads = [g for g, v in zip(grads, model.trainable_variables) if 'freq' in v.name]
    other_grads = [g for g, v in zip(grads, model.trainable_variables) if 'freq' not in v.name]
    
    optimizer_k.apply_gradients(zip(k_grads, k_vars))
    optimizer_main.apply_gradients(zip(other_grads, other_vars))
    
    return loss

def run_experiment(initializer_name, initializer):
    H = [4, 2]
    K = [1.4, 1.6]  
    learning_rate_main = 0.001
    learning_rate_k = 0.01
    epochs = 20
    batch_size = 32

    # Create model
    model, sine_layers_k = create_model('sine', H, K, initializer)

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Create separate optimizers
    optimizer_main = Adam(learning_rate=learning_rate_main)
    optimizer_k = Adam(learning_rate=learning_rate_k)

    for epoch in range(epochs):
        total_loss = 0.0
        num_batches = 0
        for i in range(0, len(X_train), batch_size):
            x_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            
            batch_loss = train_step(model, x_batch, y_batch, optimizer_main, optimizer_k)
            total_loss += batch_loss.numpy()
            num_batches += 1
        
        avg_loss = total_loss / num_batches
        
        if epoch % 10 == 0:
            test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
            print(f"Initializer: {initializer_name}, Epoch {epoch+1}/{epochs}, Avg Loss: {avg_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Initializer: {initializer_name}, Final Test Accuracy: {test_accuracy:.4f}")

    num_params = model.count_params()
    
    save_results_to_csv(
        test_accuracy=test_accuracy,
        num_params=num_params,
        activation='sine',
        initializer=initializer_name,
        dataset_name='Concentric Circles'
    )
    
    for i, layer in enumerate(sine_layers_k):
        print(f"Initializer: {initializer_name}, Layer {i+1} - Initial k: {K[i]}, Final k: {layer.freq.numpy():.4f}")
    
    return test_accuracy

def save_results_to_csv(test_accuracy, num_params, activation, initializer, dataset_name):
    csv_filename = 'experiment_results.csv'
    headers = ['Test Accuracy', 'Number of Parameters', 'Activation', 'Initializer', 'Dataset']

    row = [test_accuracy, num_params, activation, initializer, dataset_name]

    file_exists = os.path.isfile(csv_filename)
    
    with open(csv_filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        if not file_exists:
            writer.writerow(headers)
        
        writer.writerow(row)


initializers = {
    'xavier': tf.keras.initializers.GlorotUniform(),
    'he': tf.keras.initializers.HeNormal(),
    'lecun': tf.keras.initializers.LecunUniform(),
    'random': tf.keras.initializers.RandomUniform(minval=-1.5, maxval=1.5),
    'uniform': tf.keras.initializers.RandomUniform(minval=-1.5, maxval=1.5)
}


results = {}
for name, initializer in initializers.items():
    print(f"\nRunning experiment with {name} initializer...")
    accuracy = run_experiment(name, initializer)
    results[name] = accuracy

print("\nSummary of Results:")
for name, accuracy in results.items():
    print(f"{name} initializer: {accuracy:.4f}")

best_initializer = max(results, key=results.get)
print(f"\nBest initializer: {best_initializer} with accuracy {results[best_initializer]:.4f}")



Running experiment with xavier initializer...
Initializer: xavier, Epoch 1/20, Avg Loss: 0.6152, Test Accuracy: 0.6950
Initializer: xavier, Epoch 11/20, Avg Loss: 0.4794, Test Accuracy: 0.7900
Initializer: xavier, Final Test Accuracy: 0.9875
Initializer: xavier, Layer 1 - Initial k: 1.4, Final k: 0.4932
Initializer: xavier, Layer 2 - Initial k: 1.6, Final k: 2.6732

Running experiment with he initializer...
Initializer: he, Epoch 1/20, Avg Loss: 0.6216, Test Accuracy: 0.7725
Initializer: he, Epoch 11/20, Avg Loss: 0.3636, Test Accuracy: 0.9200
Initializer: he, Final Test Accuracy: 0.9875
Initializer: he, Layer 1 - Initial k: 1.4, Final k: 2.1664
Initializer: he, Layer 2 - Initial k: 1.6, Final k: 3.0727

Running experiment with lecun initializer...
Initializer: lecun, Epoch 1/20, Avg Loss: 0.6180, Test Accuracy: 0.7450
Initializer: lecun, Epoch 11/20, Avg Loss: 0.5442, Test Accuracy: 0.7450
Initializer: lecun, Final Test Accuracy: 0.7450
Initializer: lecun, Layer 1 - Initial k: 1.4, F

## All Weight initialisation With activations

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_circles
import csv
import os

# Custom Mish activation function
def mish(x):
    return x * tf.math.tanh(tf.math.softplus(x))

# Generate Concentric Circles dataset
X, y = make_circles(n_samples=2000, noise=0.1, factor=0.5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def create_model(activation, hidden_units, initializer):
    model = Sequential([layers.Input(shape=(2,))])
    
    for i, units in enumerate(hidden_units):
        model.add(layers.Dense(units, 
                               kernel_initializer=initializer, 
                               bias_initializer=initializer, 
                               name=f'dense_{i}'))
        if activation == 'mish':
            model.add(layers.Activation(mish))
        else:
            model.add(layers.Activation(activation))
    
    model.add(layers.Dense(1, 
                           activation='sigmoid', 
                           kernel_initializer=initializer, 
                           bias_initializer=initializer, 
                           name='output'))
    return model

def run_experiment(activation, initializer_name, initializer, dataset_name):
    # Model parameters
    H = [4, 2]
    learning_rate = 0.001
    epochs = 20
    batch_size = 32

    # Create model
    model = create_model(activation, H, initializer)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_split=0.2,
                        verbose=0)

    # Final evaluation
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    num_params = model.count_params()
    
    print(f"Activation: {activation}, Initializer: {initializer_name}, Final Test Accuracy: {test_accuracy:.4f}, Num Params: {num_params}")
    
    # Save results to CSV
    save_results_to_csv(test_accuracy, num_params, activation, initializer_name, dataset_name)
    
    return test_accuracy

def save_results_to_csv(test_accuracy, num_params, activation, weight_init, dataset):
    csv_filename = 'experiment_results.csv'
    headers = ['test_accuracy', 'num_params', 'activation', 'weight_init', 'dataset']

    row = [test_accuracy, num_params, activation, weight_init, dataset]

    file_exists = os.path.isfile(csv_filename)
    
    with open(csv_filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        if not file_exists:
            writer.writerow(headers)
        
        writer.writerow(row)

# Define activations and initializers
activations = ['relu', 'leaky_relu', 'elu', 'softplus', 'swish', 'mish', 'gelu']
initializers = {
    'xavier': tf.keras.initializers.GlorotUniform(),
    'he': tf.keras.initializers.HeNormal(),
    'lecun': tf.keras.initializers.LecunUniform(),
    'random': tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05),
    'uniform': tf.keras.initializers.RandomUniform(minval=-0.05, maxval=0.05)
}

# Dataset name
dataset_name = 'Concentric Circles'

# Run experiments for each activation and initializer
results = {}
for activation in activations:
    for init_name, initializer in initializers.items():
        print(f"\nRunning experiment with {activation} activation and {init_name} initializer...")
        accuracy = run_experiment(activation, init_name, initializer, dataset_name)
        results[(activation, init_name)] = accuracy

# Print summary of results
print("\nSummary of Results:")
for (activation, init_name), accuracy in results.items():
    print(f"{activation} activation with {init_name} initializer: {accuracy:.4f}")

# Find the best combination
best_combo = max(results, key=results.get)
print(f"\nBest combination: {best_combo[0]} activation with {best_combo[1]} initializer, accuracy: {results[best_combo]:.4f}")



Running experiment with relu activation and xavier initializer...
Activation: relu, Initializer: xavier, Final Test Accuracy: 0.5075, Num Params: 25

Running experiment with relu activation and he initializer...
Activation: relu, Initializer: he, Final Test Accuracy: 0.7700, Num Params: 25

Running experiment with relu activation and lecun initializer...
Activation: relu, Initializer: lecun, Final Test Accuracy: 0.6600, Num Params: 25

Running experiment with relu activation and random initializer...
Activation: relu, Initializer: random, Final Test Accuracy: 0.4925, Num Params: 25

Running experiment with relu activation and uniform initializer...
Activation: relu, Initializer: uniform, Final Test Accuracy: 0.7125, Num Params: 25

Running experiment with leaky_relu activation and xavier initializer...
Activation: leaky_relu, Initializer: xavier, Final Test Accuracy: 0.5975, Num Params: 25

Running experiment with leaky_relu activation and he initializer...
Activation: leaky_relu, Ini