# Standard Softmax

## Imports

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Dataset

In [2]:
# Load the CIFAR100 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

# Normalize pixel values to be between 0 and 1
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Convert labels to one-hot encoding
y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


## Model

In [3]:
# Define the model architecture
model = keras.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", padding="same", input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dense(100, activation="softmax"),
    ]
)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7632c4fcd0>

## Evaluation of Model

In [4]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Predict on the test set
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Convert one-hot encoding back to labels
y_true = np.argmax(y_test, axis=1)

# Compute evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')
confusion = confusion_matrix(y_true, y_pred)

# Print results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", confusion)

Accuracy: 0.4152
Precision: 0.4342838332714296
Recall: 0.41519999999999996
F1 Score: 0.41567851811053136
Confusion Matrix:
 [[71  0  0 ...  0  0  0]
 [ 1 49  0 ...  0  0  0]
 [ 1  2 25 ...  1  9  2]
 ...
 [ 0  0  0 ... 33  0  0]
 [ 1  0  4 ...  1 25  1]
 [ 0  0  0 ...  0  2 47]]


# Different Softmax

## Gumbel-Softmax

In [5]:
# Define the Gumbel-Softmax function
def gumbel_softmax(logits, temperature):
    # Sample from a Gumbel distribution
    u = tf.random.uniform(tf.shape(logits), minval=0, maxval=1)
    gumbel = -tf.math.log(-tf.math.log(u + 1e-20) + 1e-20)
    
    # Add the Gumbel noise to the logits and apply temperature
    y = logits + gumbel
    y = y / temperature
    
    # Compute the softmax
    y = tf.nn.softmax(y)
    
    return y


# Define the custom loss function using the Gumbel-Softmax function
def gumbel_softmax_loss(y_true, y_pred):
    y_pred = gumbel_softmax(y_pred, temperature)
    loss = keras.losses.categorical_crossentropy(y_true, y_pred)
    return loss

## Dataset

In [6]:
# Load the CIFAR100 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

# Normalize pixel values to be between 0 and 1
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Convert labels to one-hot encoding
y_train = keras.utils.to_categorical(y_train, 100)
y_test = keras.utils.to_categorical(y_test, 100)

## Model

In [7]:
# Define the model architecture
model = keras.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", padding="same", input_shape=(32, 32, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dense(100, activation=None),
    ]
)

# Define the temperature for the Gumbel-Softmax function
temperature = 0.5
# Compile the model with the custom loss function
model.compile(optimizer="adam", loss=gumbel_softmax_loss, metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f75f28c78e0>

## Evaluation of Model

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Evaluate the model on the test set
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Compute the evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="macro")
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")
cm = confusion_matrix(y_true, y_pred)

# Print the evaluation metrics
print("Accuracy: {:.4f}".format(accuracy))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 score: {:.4f}".format(f1))
print("Confusion matrix:")
print(cm)

Accuracy: 0.4152
Precision: 0.4465
Recall: 0.4152
F1 score: 0.4164
Confusion matrix:
[[57  1  0 ...  0  0  0]
 [ 0 45  0 ...  0  0  2]
 [ 0  0 13 ...  2 14  0]
 ...
 [ 0  0  1 ... 49  1  0]
 [ 0  0  4 ...  1 32  0]
 [ 0  0  0 ...  0  0 46]]


# Bonus

In [14]:
import tensorflow as tf
from tensorflow.keras import layers

# Define the input shape
input_shape = (32, 32, 3)

# Define the number of classes
num_classes = 100

# Define the number of attention heads
num_heads = 8

# Define the transformer layer
def transformer_layer(inputs, hidden_size, num_heads):
    # Multi-Head Attention
    attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=hidden_size // num_heads)(inputs, inputs)
    attn_output = layers.Dropout(0.1)(attn_output)
    attn_output = layers.LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    # Feed Forward network
    ffn = tf.keras.Sequential([
        layers.Dense(hidden_size, activation='relu'),
        layers.Dense(hidden_size)
    ])
    ffn_output = ffn(attn_output)
    ffn_output = layers.Dropout(0.1)(ffn_output)
    ffn_output = layers.LayerNormalization(epsilon=1e-6)(attn_output + ffn_output)

    return ffn_output

# Define the transformer-based model
def transformer_model(num_classes, num_heads, hidden_size):
    # Define the inputs
    inputs = layers.Input(shape=input_shape)

    # Preprocessing layers
    x = layers.experimental.preprocessing.Rescaling(1./255)(inputs)
    x = layers.experimental.preprocessing.RandomCrop(30, 30)(x)

    # Convolutional layers
    x = layers.Conv2D(32, 3, activation='relu')(x)
    x = layers.Conv2D(64, 3, activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)
    x = layers.Flatten()(x)

    # Transformer layers
    x = layers.Dense(hidden_size)(x)
    x = layers.Reshape((1, hidden_size))(x)
    x = transformer_layer(x, hidden_size, num_heads)
    x = layers.GlobalAveragePooling1D()(x)

    # Output layer with different softmax functions
    outputs_softmax = layers.Dense(num_classes, activation='softmax')(x)
    outputs_gumbel_softmax = layers.Dense(num_classes, activation='linear')(x)
    outputs_gumbel_softmax = tf.nn.softmax(tf.random.gumbel(tf.shape(outputs_gumbel_softmax)) + outputs_gumbel_softmax)

    # Define the model
    model = tf.keras.Model(inputs=inputs, outputs=[outputs_softmax, outputs_gumbel_softmax])

    return model

# Create an instance of the transformer-based model
model = transformer_model(num_classes=num_classes, num_heads=num_heads, hidden_size=128)

# Compile the model
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], 
              optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(x_train, [y_train, y_train], 
                    batch_size=128, epochs=10, validation_data=(x_test, [y_test, y_test]))


AttributeError: ignored