In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.losses import CategoricalCrossentropy, MeanSquaredError
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Data Preprocessing
print("1. Data Preprocessing")

# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normalize pixel values to range between 0 and 1
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert class labels into one-hot encoded format
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Split the training data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

print(f"Training set shape: {x_train.shape}")
print(f"Validation set shape: {x_val.shape}")
print(f"Test set shape: {x_test.shape}")

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(x_train)

# 2. Network Architecture Design
print("\n2. Network Architecture Design")

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='tanh'),
    layers.Dense(10, activation='softmax')
])

model.summary()

# 3. Activation Functions
print("\n3. Activation Functions")
print("We've chosen two activation functions for our hidden layers:")
print("a) ReLU (Rectified Linear Unit):")
print("   - Formula: f(x) = max(0, x)")
print("   - Role in backpropagation: ReLU allows for sparse activation and efficient gradient propagation.")
print("   - It helps mitigate the vanishing gradient problem, allowing for faster training of deep networks.")
print("\nb) Tanh (Hyperbolic Tangent):")
print("   - Formula: f(x) = (e^x - e^-x) / (e^x + e^-x)")
print("   - Role in backpropagation: Tanh outputs values between -1 and 1, allowing for both positive and negative activations.")
print("   - It can help center the data, which can be beneficial for the next layer.")

# 4. Loss Function and Optimizer
print("\n4. Loss Function and Optimizer")

# Define two loss functions
loss_cce = CategoricalCrossentropy()
loss_mse = MeanSquaredError()

# Define optimizer
optimizer = optimizers.Adam(learning_rate=0.001)

print("We'll compare two loss functions:")
print("a) Categorical Cross-Entropy (CCE):")
print("   - Commonly used for multi-class classification problems.")
print("   - Measures the dissimilarity between the predicted probability distribution and the true distribution.")
print("\nb) Mean Squared Error (MSE):")
print("   - Usually used for regression, but can be applied to classification.")
print("   - Measures the average squared difference between the predicted and true values.")
print("\nOptimizer: Adam")
print("- Adaptive learning rate optimization algorithm.")
print("- Combines ideas from RMSprop and Momentum.")
print("\nLearning rate in backpropagation:")
print("- Controls the step size at each iteration while moving toward a minimum of the loss function.")
print("- A high learning rate can cause overshooting, while a low one can result in slow convergence.")
print("- Adam adapts the learning rate for each parameter, which can help in finding a good balance.")

# Compile the model with CCE loss
model.compile(optimizer=optimizer, loss=loss_cce, metrics=['accuracy'])

# 5. Training the Model
print("\n5. Training the Model")

epochs = 50
batch_size = 64

# Train with CCE loss
print("\nTraining with Categorical Cross-Entropy loss:")
history_cce = model.fit(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(x_val, y_val),
    verbose=1
)

# Recompile the model with MSE loss and train again
model.compile(optimizer=optimizer, loss=loss_mse, metrics=['accuracy'])
print("\nTraining with Mean Squared Error loss:")
history_mse = model.fit(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(x_val, y_val),
    verbose=1
)

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_cce.history['accuracy'], label='CCE Training Accuracy')
plt.plot(history_cce.history['val_accuracy'], label='CCE Validation Accuracy')
plt.plot(history_mse.history['accuracy'], label='MSE Training Accuracy')
plt.plot(history_mse.history['val_accuracy'], label='MSE Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_cce.history['loss'], label='CCE Training Loss')
plt.plot(history_cce.history['val_loss'], label='CCE Validation Loss')
plt.plot(history_mse.history['loss'], label='MSE Training Loss')
plt.plot(history_mse.history['val_loss'], label='MSE Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

# 6. Model Evaluation
print("\n6. Model Evaluation")

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Make predictions
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes))

# Confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

print("\nThis completes the implementation of the neural network for CIFAR-10 classification.")


1. Data Preprocessing
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Training set shape: (40000, 32, 32, 3)
Validation set shape: (10000, 32, 32, 3)
Test set shape: (10000, 32, 32, 3)

2. Network Architecture Design


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 15, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 6, 6, 64)          0         
 g2D)                                                            
    