In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
import pandas as pd

In [None]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values
x_train, x_test = x_train / 255.0, x_test / 255.0

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# Define activation functions and optimizers
activation_functions = ['relu', 'tanh', 'sigmoid', 'softmax', 'elu', 'selu', 'softplus', 'softsign']
optimizers = ['adam', 'sgd', 'adadelta', 'adagrad', 'adamax', 'nadam', 'ftrl', 'rmsprop']

# Function to create model
def create_model(activation, optimizer):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(128, activation=activation),
        Dense(64, activation=activation),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Train and evaluate models
results = []
for activation in activation_functions:
    for optimizer in optimizers:
        print(f'\n\nActivation function: {activation} ----- Optimizer : {optimizer}')
        model = create_model(activation, optimizer)
        model.fit(x_train, y_train, epochs=5, validation_split=0.2, verbose=2)
        loss, accuracy = model.evaluate(x_test, y_test, verbose=2)
        results.append([activation, optimizer, accuracy, loss])





Activation function: relu ----- Optimizer : adam
Epoch 1/5
1500/1500 - 6s - loss: 0.2670 - accuracy: 0.9225 - val_loss: 0.1372 - val_accuracy: 0.9600 - 6s/epoch - 4ms/step
Epoch 2/5
1500/1500 - 4s - loss: 0.1128 - accuracy: 0.9655 - val_loss: 0.1074 - val_accuracy: 0.9672 - 4s/epoch - 3ms/step
Epoch 3/5
1500/1500 - 4s - loss: 0.0785 - accuracy: 0.9755 - val_loss: 0.0996 - val_accuracy: 0.9688 - 4s/epoch - 3ms/step
Epoch 4/5
1500/1500 - 4s - loss: 0.0595 - accuracy: 0.9808 - val_loss: 0.0954 - val_accuracy: 0.9720 - 4s/epoch - 3ms/step
Epoch 5/5
1500/1500 - 4s - loss: 0.0478 - accuracy: 0.9845 - val_loss: 0.1148 - val_accuracy: 0.9672 - 4s/epoch - 3ms/step
313/313 - 1s - loss: 0.1050 - accuracy: 0.9694 - 671ms/epoch - 2ms/step


Activation function: relu ----- Optimizer : sgd
Epoch 1/5
1500/1500 - 5s - loss: 0.6953 - accuracy: 0.8210 - val_loss: 0.3439 - val_accuracy: 0.9032 - 5s/epoch - 3ms/step
Epoch 2/5
1500/1500 - 4s - loss: 0.3270 - accuracy: 0.9072 - val_loss: 0.2790 - val_accur

In the above you can see 1500/1500.   
i.e   
Training data size = 60000    
Testing/Evaluation data size = 10000    

validation set is 0.2 (2% of training data). So that training size becomes = 48000

if NO Batch_size is mentioned, default is 32    
Now    
Training : 48000/32 = 1500    
Testing : 10000/32 = 312.5 ~ 313   

Which means: In each Epoch, the total data size of 48000 is processed in 32 Batches where ecah Batch contains 1500 data points.

1. **Training Data Size**: The total training dataset size is 60,000 samples.

2. **Testing/Evaluation Data Size**: The total test dataset size is 10,000 samples.

3. **Validation Set Size**: During training, 20% of the training data (12,000 samples) is set aside as a validation set. The remaining 80% (48,000 samples) is used for actual training.

4. **Default Batch Size**: If not explicitly mentioned, the default batch size used by TensorFlow is typically 32.

5. **Training Process**: During each epoch of training, the 48,000 training samples are divided into batches, with each batch containing 32 samples. This results in 1500 batches being processed per epoch.

6. **Testing Process**: During testing, the 10,000 test samples are also divided into batches of 32 samples each. This results in approximately 313 batches being processed for evaluation.

7. **Evaluation Logs**: The "1500/1500" during training and "313/313" during testing indicate the number of batches processed during each respective phase, with the first number denoting the batches processed and the second number denoting the total number of batches in the dataset.

In [None]:
# Display results in table format
results_df = pd.DataFrame(results, columns=['Activation Function', 'Optimizer', 'Accuracy', 'Loss'])
print(results_df)
results_df.to_csv('Report_Activation_And_Optimizers.csv', index=False)

   Activation Function Optimizer  Accuracy      Loss
0                 relu      adam    0.9694  0.104975
1                 relu       sgd    0.9411  0.199413
2                 relu  adadelta    0.6039  1.804599
3                 relu   adagrad    0.9085  0.354078
4                 relu    adamax    0.9692  0.103416
..                 ...       ...       ...       ...
59            softsign   adagrad    0.8885  0.526023
60            softsign    adamax    0.9610  0.135179
61            softsign     nadam    0.9733  0.083029
62            softsign      ftrl    0.1135  2.301871
63            softsign   rmsprop    0.9718  0.092565

[64 rows x 4 columns]
