In [45]:
import kagglehub
import os
import tensorflow as tf
from tensorflow import keras

# Import du dataset

In [46]:
# Download latest version
path = kagglehub.dataset_download("puneet6060/intel-image-classification")

print("Path to dataset files:", path)

Path to dataset files: /Users/erwan/.cache/kagglehub/datasets/puneet6060/intel-image-classification/versions/2


# Chargement du dataset

In [47]:
train_path = os.path.join(path, "seg_train", "seg_train")
test_path = os.path.join(path, "seg_test", "seg_test")
predict_path = os.path.join(path, "seg_pred")

# Création du data pipeline

In [48]:
IMG_SIZE = (150, 150)
BATCH_SIZE = 32
SEED = 42

Pour la chargement du dataset nous utilison la fonction suivant : `image_dataset_from_directory`

Elle permet de directement avoir :
- un chargement efficace des images
- des prétraitements intégrés
- et une optimisation avec TensorFlow

Voici une explication des paramètres importants : 

|                 **Paramètre**                	|                           **Description**                          	|
|:--------------------------------------------:	|:------------------------------------------------------------------:	|
| `directory`                                  	| Chemin du dataset.                                                 	|
| `labels="inferred"`                          	| Déduit les labels des noms de sous-dossiers.                       	|
| `label_mode="int"`                           	| Les labels sont encodés comme des entiers.                         	|
| `batch_size=32`                              	| Nombre d’images chargées par batch.                                	|
| `image_size=(150,150)`                       	| Redimensionne les images à cette taille.                           	|
| `validation_split=0.2`                       	| Réserve 20% des images pour la validation.                         	|
| `subset="training"`<br>`subset="validation"` 	| Permet de séparer le dataset.                                      	|
| `shuffle=True`                               	| Mélange les images pour éviter les biais.                          	|
| `seed=42`                                    	| Assure que le split train/val est reproductible en fixant la seed. 	|

## Chargement du dataset de **train / validation**

In [49]:
train_dataset = keras.utils.image_dataset_from_directory(
    train_path,
    labels="inferred",
    label_mode="int",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    validation_split=0.2,  # 80% train / 20% validation
    subset="training",
    seed=SEED
)

val_dataset = keras.utils.image_dataset_from_directory(
    train_path,
    labels="inferred",
    label_mode="int",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    validation_split=0.2,
    subset="validation",
    seed=SEED
)

Found 14034 files belonging to 6 classes.
Using 11228 files for training.
Found 14034 files belonging to 6 classes.
Using 2806 files for validation.


## Chargement du dataset de **test**

In [50]:
test_dataset = keras.utils.image_dataset_from_directory(
    test_path,
    labels="inferred",
    label_mode="int",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=False
)

Found 3000 files belonging to 6 classes.


## Chargement du dataset de **prédiction**

In [51]:
predict_dataset = keras.utils.image_dataset_from_directory(
    predict_path,
    labels=None,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=False
)

Found 7301 files.


# Modèle CNN maison

## Création du modèle

In [52]:
model = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    keras.layers.MaxPooling2D(2, 2),
    
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(2, 2),
    
    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D(2, 2),

    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(6, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Compilation du modèle

In [54]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer="adam",
    metrics=["accuracy"]
)

model.summary()

## Entraînement du modèle

In [55]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)

Epoch 1/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 180ms/step - accuracy: 0.3062 - loss: 18.2007 - val_accuracy: 0.4444 - val_loss: 1.3608
Epoch 2/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 178ms/step - accuracy: 0.4306 - loss: 1.4129 - val_accuracy: 0.5011 - val_loss: 1.1945
Epoch 3/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 179ms/step - accuracy: 0.5092 - loss: 1.2072 - val_accuracy: 0.5542 - val_loss: 1.0722
Epoch 4/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 183ms/step - accuracy: 0.5544 - loss: 1.1774 - val_accuracy: 0.5684 - val_loss: 1.0807
Epoch 5/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 177ms/step - accuracy: 0.5556 - loss: 1.1507 - val_accuracy: 0.4644 - val_loss: 1.2868
Epoch 6/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 177ms/step - accuracy: 0.5039 - loss: 1.2502 - val_accuracy: 0.5335 - val_loss: 1.1775
Epoch 7/1

In [56]:
model.save("intel_cnn_model_homemade.h5")



In [58]:
test_loss, test_acc = model.evaluate(test_dataset)

print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - accuracy: 0.6762 - loss: 0.9406
Test Accuracy: 0.6397
Test Loss: 1.0431


# Modèle avec transfert learning