In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os

print("Libraries imported successfully!")
print(f"Using random seed: {random.seed(42)}")  # For reproducible results

In [None]:
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Input
import tensorflow as tf

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
print("Loading MNIST dataset...")
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print("Dataset loaded! Here's what we have:")
print(f"Training images: {X_train.shape}")
print(f"Training labels: {y_train.shape}")
print(f"Test images: {X_test.shape}")
print(f"Test labels: {y_test.shape}")
print(f"Image dimensions: {X_train[0].shape} (28x28 pixels)")
print(f"Pixel value range: {X_train.min()} to {X_train.max()}")

In [None]:
print("Let's look at some handwritten digits:")
plt.figure(figsize=(10, 6))
for i in range(9):
    plt.subplot(3, 3, i+1)
    # Use fixed indices for consistency across runs
    idx = i * 1000  # This gives us a variety while being reproducible
    plt.imshow(X_train[idx], cmap='gray', interpolation='none')
    plt.title(f"Label: {y_train[idx]}")
    plt.axis('off')

plt.tight_layout()
plt.savefig('sample_digits.png', dpi=150, bbox_inches='tight')
plt.show()
print("Sample digits saved as 'sample_digits.png'")

In [None]:
print("Preprocessing the data...")
print("Original shape:", X_train.shape)

# Flatten the 28x28 images into 784-element vectors
X_train_flat = X_train.reshape(60000, 784)  # 28*28 = 784
X_test_flat = X_test.reshape(10000, 784)

# Normalize pixel values to 0-1 range (neural networks work better with small numbers)
X_train_norm = X_train_flat.astype('float32') / 255.0
X_test_norm = X_test_flat.astype('float32') / 255.0

print("After preprocessing:")
print(f"Training data shape: {X_train_norm.shape}")
print(f"Test data shape: {X_test_norm.shape}")
print(f"New pixel value range: {X_train_norm.min():.1f} to {X_train_norm.max():.1f}")

In [None]:
print("Converting labels to one-hot encoding...")
print(f"Original label example: {y_train[0]} (just a number)")

num_classes = 10  # digits 0-9
Y_train = to_categorical(y_train, num_classes)
Y_test = to_categorical(y_test, num_classes)

print(f"One-hot encoded example: {Y_train[0]} (array with 1 in position {y_train[0]})")
print(f"Training labels shape: {Y_train.shape}")

In [None]:
print("Building the neural network...")
model = Sequential([
    Input(shape=(784,)),           # Input layer: expects 784 features (28x28 flattened)
    Dense(512, activation='relu'), # Hidden layer 1: 512 neurons with ReLU activation
    Dropout(0.2),                  # Dropout: randomly ignore 20% of neurons (prevents overfitting)
    Dense(512, activation='relu'), # Hidden layer 2: another 512 neurons
    Dropout(0.2),                  # More dropout for regularization
    Dense(10, activation='softmax') # Output layer: 10 neurons (one per digit), softmax gives probabilities
])

print("Model architecture:")
model.summary()

In [None]:
print("Configuring the model for training...")
model.compile(
    loss='categorical_crossentropy',  # Good for multi-class classification
    optimizer='adam',                 # Adaptive learning rate optimizer
    metrics=['accuracy']              # Track accuracy during training
)
print("Model compiled and ready to train!")

In [None]:
print("Starting training...")
print("This should be fairly fast with our resources...")

history = model.fit(
    X_train_norm, Y_train,
    batch_size=128,    # Process 128 examples at a time
    epochs=10,         # Go through entire dataset 10 times
    verbose=1,         # Show progress
    validation_split=0.1  # Use 10% of training data for validation
)

print("Training completed!")

In [None]:
print("Evaluating model performance...")
test_loss, test_accuracy = model.evaluate(X_test_norm, Y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")

In [None]:
print("Visualize the training progress")
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy Over Time')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()
print("Training history saved as 'training_history.png'")

In [None]:
print("Making predictions on test examples...")
predictions = model.predict(X_test_norm[:10])  # Predict first 10 test examples

plt.figure(figsize=(15, 6))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(X_test[i], cmap='gray')
    predicted_digit = np.argmax(predictions[i])
    actual_digit = y_test[i]
    confidence = predictions[i][predicted_digit]
    
    color = 'green' if predicted_digit == actual_digit else 'red'
    plt.title(f'Pred: {predicted_digit} (Actual: {actual_digit})\nConfidence: {confidence:.3f}', 
              color=color)
    plt.axis('off')

plt.tight_layout()
plt.savefig('predictions_sample.png', dpi=150, bbox_inches='tight')
plt.show()
print("Prediction examples saved as 'predictions_sample.png'")

In [None]:
model_filename = 'mnist_model.h5'
print(f"Saving trained model in old format as '{model_filename}'...")
model.save(model_filename)
print("Model saved successfully!")

# Also save in newer format
model_filename_new = 'mnist_model.keras'
model.save(model_filename_new)
print(f"Model also saved in newer format as '{model_filename_new}'")

In [None]:
print("Demonstrating how to load a saved model...")
loaded_model = load_model(model_filename_new)
# loaded_model = load_model(model_filename) #The new name (.keras) and old (.h5) should be the same so you can load either
print("Model loaded successfully!")

# Verify it works the same
test_prediction_original = model.predict(X_test_norm[:1])
test_prediction_loaded = loaded_model.predict(X_test_norm[:1])
print(f"Original model prediction: {np.argmax(test_prediction_original)}")
print(f"Loaded model prediction: {np.argmax(test_prediction_loaded)}")
print("✓ Both models give the same result!")

print("\n" + "="*50)
print("TUTORIAL COMPLETE!")
print("="*50)
print("What you've learned:")
print("• How to load and preprocess image data")
print("• How to build a neural network with Keras")
print("• How to train a model and track its progress")
print("• How to evaluate model performance")
print("• How to save and load trained models")
print("• How to make predictions on new data")
print(f"Final test accuracy: {test_accuracy*100:.2f}%")
print("="*50)