In [1]:
# Importation des bibliothèques nécessaires
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.18.0


In [2]:
# 1. Charger le jeu de données MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Afficher la forme des données
print("Forme initiale de x_train:", x_train.shape)

# 2. Prétraitement des images
# Les CNNs attendent une 4ème dimension : (nb_images, hauteur, largeur, canaux_de_couleur)
# Pour MNIST, le canal est 1 (niveaux de gris)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

# 3. Normalisation des pixels
# On ramène les valeurs des pixels de [0, 255] à [0, 1] pour aider le modèle à converger plus vite.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# 4. Prétraitement des étiquettes (labels)
# On transforme les étiquettes (ex: 5) en vecteurs "one-hot" (ex: [0,0,0,0,0,1,0,0,0,0])
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

print("Forme de x_train après remodelage:", x_train.shape)
print("Forme de y_train après one-hot encoding:", y_train.shape)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Forme initiale de x_train: (60000, 28, 28)
Forme de x_train après remodelage: (60000, 28, 28, 1)
Forme de y_train après one-hot encoding: (60000, 10)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import time

# Ensure your MNIST data is loaded and preprocessed before this cell


# --- Step 1: Create a flexible function to build the model ---
def create_model(optimizer='adam', activation='relu'):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation=activation, input_shape=(28, 28, 1)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, kernel_size=(3, 3), activation=activation),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation=activation),
        Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model


# --- STAGE 1: MANUAL GRID SEARCH WITH CROSS-VALIDATION ON A SUBSET ---

print("--- Starting Stage 1: Manual Hyperparameter Search ---")
start_time = time.time()

# --- Step 2: Define the grid of hyperparameters to test ---
param_grid = {
    'optimizer': ['adam', 'rmsprop'],
    'activation': ['relu', 'tanh'],
    'batch_size': [32, 64]
}
# We will use a fixed number of epochs for the search phase
epochs_for_search = 1

# --- Step 3: Create a smaller, representative subset of the data ---
# Using a random subset is crucial for getting a good estimate.
subset_size = 9000

# Create random indices and select a subset
random_indices = np.random.choice(x_train.shape[0], subset_size, replace=False)
x_search_subset = x_train[random_indices]
y_search_subset = y_train[random_indices]

print(f"Searching for hyperparameters using a random subset of {subset_size} images.")

# --- Step 4: Manually implement K-Fold Cross-Validation ---
k_folds = 3
fold_size = subset_size // k_folds

# Create a dictionary to store the average score for each parameter combination
results = {}

# --- Step 5: Loop through every combination of hyperparameters ---
# This is the core of the manual grid search
param_combinations = []
for optimizer in param_grid['optimizer']:
    for activation in param_grid['activation']:
        for batch_size in param_grid['batch_size']:
            param_combinations.append({
                'optimizer': optimizer,
                'activation': activation,
                'batch_size': batch_size
            })

for params in param_combinations:
    print(f"\nTesting params: {params}")
    fold_scores = []

    # --- Inner loop for K-Fold Cross-Validation ---
    for k in range(k_folds):
        print(f"  - Fold {k+1}/{k_folds}")

        # Define the validation and training data for this fold
        start, end = k * fold_size, (k + 1) * fold_size
        x_val_fold = x_search_subset[start:end]
        y_val_fold = y_search_subset[start:end]

        x_train_fold = np.concatenate([x_search_subset[:start], x_search_subset[end:]])
        y_train_fold = np.concatenate([y_search_subset[:start], y_search_subset[end:]])

        # Create a fresh model for this fold
        model = create_model(optimizer=params['optimizer'], activation=params['activation'])

        # Train the model
        model.fit(x_train_fold, y_train_fold,
                  batch_size=params['batch_size'],
                  epochs=epochs_for_search,
                  verbose=0) # Keep output clean

        # Evaluate on the validation fold and store the accuracy
        _, accuracy = model.evaluate(x_val_fold, y_val_fold, verbose=0)
        fold_scores.append(accuracy)

    # Calculate and store the average accuracy across all folds for this parameter set
    average_score = np.mean(fold_scores)
    print(f"  --> Average CV Accuracy: {average_score:.4f}")
    # Use a tuple of items as the dictionary key because dictionaries can't be keys
    results[tuple(sorted(params.items()))] = average_score

# --- Step 6: Find the best parameters from the results ---
best_params_tuple = max(results, key=results.get)
best_params = dict(best_params_tuple)
best_score = results[best_params_tuple]

search_duration = time.time() - start_time
print("\n--- Search Complete ---")
print(f"Manual search took {search_duration:.2f} seconds.")
print(f"Best Average CV Accuracy: {best_score:.4f}")
print(f"Best Hyperparameters Found: {best_params}")


# --- STAGE 2: TRAIN THE FINAL MODEL ON THE FULL DATASET ---

print("\n--- Starting Stage 2: Training the Final Model ---")

# --- Step 7: Create the final model with the best hyperparameters ---
# We add the 'epochs' back in for the final training run
final_epochs = 10 # You can choose to train for longer
print(f"Creating and training final model with {best_params} for {final_epochs} epochs.")

final_model = create_model(optimizer=best_params['optimizer'], activation=best_params['activation'])
final_model.summary()

# --- Step 8: Train this model on the ENTIRE training dataset ---
history = final_model.fit(x_train, y_train,
                          batch_size=best_params['batch_size'],
                          epochs=final_epochs,
                          validation_split=0.1, # Good practice to monitor validation loss
                          verbose=1)
# --- Plot training & validation accuracy over epochs ---
import matplotlib.pyplot as plt

train_acc = history.history['accuracy']       # Training accuracy list
val_acc   = history.history['val_accuracy']   # Validation accuracy list
epochs    = range(1, len(train_acc) + 1)

plt.figure()
plt.plot(epochs, train_acc,    label='Training accuracy')
plt.plot(epochs, val_acc,      label='Validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training & Validation Accuracy')
plt.legend()
plt.show()


# --- Step 9: Evaluate the final, trained model on the untouched test set ---
print("\n--- Final Evaluation on the Test Set ---")
final_loss, final_accuracy = final_model.evaluate(x_test, y_test, verbose=0)
print(f"Final Loss on Test Set: {final_loss:.4f}")
print(f"Final Accuracy on Test Set: {final_accuracy:.4f}")

--- Starting Stage 1: Manual Hyperparameter Search ---
Searching for hyperparameters using a random subset of 9000 images.

Testing params: {'optimizer': 'adam', 'activation': 'relu', 'batch_size': 32}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9422

Testing params: {'optimizer': 'adam', 'activation': 'relu', 'batch_size': 64}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9241

Testing params: {'optimizer': 'adam', 'activation': 'tanh', 'batch_size': 32}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9426

Testing params: {'optimizer': 'adam', 'activation': 'tanh', 'batch_size': 64}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9253

Testing params: {'optimizer': 'rmsprop', 'activation': 'relu', 'batch_size': 32}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9322

Testing params: {'optimizer': 'rmsprop', 'activation': 'relu', 'batch_size': 64}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9292

Testing params: {'optimizer': 'rmsprop', 'activation': 'tanh', 'batch_size': 32}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9444

Testing params: {'optimizer': 'rmsprop', 'activation': 'tanh', 'batch_size': 64}
  - Fold 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 2/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  - Fold 3/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  --> Average CV Accuracy: 0.9288

--- Search Complete ---
Manual search took 266.81 seconds.
Best Average CV Accuracy: 0.9444
Best Hyperparameters Found: {'activation': 'tanh', 'batch_size': 32, 'optimizer': 'rmsprop'}

--- Starting Stage 2: Training the Final Model ---
Creating and training final model with {'activation': 'tanh', 'batch_size': 32, 'optimizer': 'rmsprop'} for 10 epochs.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 33ms/step - accuracy: 0.9242 - loss: 0.2474 - val_accuracy: 0.9852 - val_loss: 0.0466
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 33ms/step - accuracy: 0.9853 - loss: 0.0460 - val_accuracy: 0.9857 - val_loss: 0.0516
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 32ms/step - accuracy: 0.9920 - loss: 0.0267 - val_accuracy: 0.9892 - val_loss: 0.0452
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 32ms/step - accuracy: 0.9954 - loss: 0.0153 - val_accuracy: 0.9885 - val_loss: 0.0467
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 31ms/step - accuracy: 0.9969 - loss: 0.0094 - val_accuracy: 0.9885 - val_loss: 0.0471
Epoch 6/10
[1m 903/1688[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m23s[0m 30ms/step - accuracy: 0.9982 - loss: 0.0054