# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [3]:
train_df = pd.read_csv('train.csv')

In [4]:
test_df = pd.read_csv('test.csv')

In [5]:
# Séparer les labels et les pixels dans les données d'entraînement
X_train = train_df.drop('label', axis=1).values
y_train = train_df['label'].values

# Pour les données de test, il se peut que vous n'ayez pas les labels
# Si les labels sont disponibles, séparez-les de la même manière
# Sinon, X_test = test_df.values
X_test = test_df.drop('label', axis=1).values if 'label' in test_df.columns else test_df.values
y_test = test_df['label'].values if 'label' in test_df.columns else None

In [6]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0


In [7]:
# Reshaper en (nombre d'échantillons, hauteur, largeur, canaux)
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)
y_train = to_categorical(y_train, 10)
if y_test is not None:
    y_test = to_categorical(y_test, 10)
    
    
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)

In [8]:
model = models.Sequential()

# Première couche de convolution
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))

# Deuxième couche de convolution
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))

# Troisième couche de convolution
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.Dropout(0.4))

# Couche entièrement connectée
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001)

    

In [10]:
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, reduce_lr]
)


Epoch 1/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.7228 - loss: 0.8220 - val_accuracy: 0.9726 - val_loss: 0.0839 - learning_rate: 0.0010
Epoch 2/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.9576 - loss: 0.1357 - val_accuracy: 0.9814 - val_loss: 0.0548 - learning_rate: 0.0010
Epoch 3/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.9707 - loss: 0.0948 - val_accuracy: 0.9862 - val_loss: 0.0424 - learning_rate: 0.0010
Epoch 4/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.9765 - loss: 0.0762 - val_accuracy: 0.9867 - val_loss: 0.0421 - learning_rate: 0.0010
Epoch 5/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9805 - loss: 0.0668 - val_accuracy: 0.9879 - val_loss: 0.0466 - learning_rate: 0.0010
Epoch 6/50
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [11]:
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)
print(f'\nPrécision sur les données de validation : {val_acc:.4f}')
if y_test is not None:
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(f'\nPrécision sur les données de test : {test_acc:.4f}')

132/132 - 0s - 2ms/step - accuracy: 0.9955 - loss: 0.0204

Précision sur les données de validation : 0.9955


In [12]:
model.save('mnist_cnn_model.h5')



In [13]:
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Créer le DataFrame de soumission
image_ids = np.arange(1, len(predicted_labels) + 1)
submission_df = pd.DataFrame({
    'ImageId': image_ids,
    'Label': predicted_labels
})

# Sauvegarder le fichier de soumission
submission_df.to_csv('submission.csv', index=False)

# Vérifier les premières lignes du fichier de soumission
print(submission_df.head())

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
   ImageId  Label
0        1      2
1        2      0
2        3      9
3        4      9
4        5      3
