In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score, accuracy_score


In [2]:
base_path = Path('data/v2')
print(base_path)

X_train = np.load(base_path / 'train_images.npy').astype(np.float32)
Y_train = np.load(base_path / 'train_labels.npy')

X_test = np.load(base_path / 'test_public_images.npy').astype(np.float32)
Y_test = np.load(base_path / 'test_public_labels.npy')

classes, counts = np.unique(Y_train, return_counts=True)
counts_test = np.unique(Y_test, return_counts=True)[1]



data/v2


In [3]:
classes_names = [
    "Avoine d'hiver",
    "Avoine de printemps",
    "Blé tendre d'hiver",
    "Blé tendre de printemps",
    "Colza d'hiver",
    "Luzerne déshydratée",
    "Luzerne",
    "Mélange de légumineuses et graminées",
    "Maïs",
    "Maïs ensilage",
    "Orge d'hiver",
    "Orge de printemps",
    "Prairie permanente - herbe",
    "Prairie permanente - ressources fourragères ligneuses",
    "Prairie en rotation longue",
    "Pomme de terre de consommation",
    "Prairie temporaire",
    "Soja",
    "Tournesol",
    "Triticale d'hiver"
]
months = ['Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov']

print(f'Each {X_train.shape[0]} training and {X_test.shape[0]} test samples have :\n'
      f'\t{X_train.shape[1]} images (one per month from Feb to Nov)\n'
      f'\t{X_train.shape[2]} channels (B08 Near Infrared, B04 Red, B03 Green)\n'
      f'\t{X_train.shape[3]} x {X_train.shape[4]} pixels')
print(f'X_train shape: {X_train.shape}, Y_train shape: {Y_train.shape}')
print(f'X_test shape: {X_test.shape}, Y_test shape: {Y_test.shape}')

classes_df = pd.DataFrame({
    'Class Name': classes_names,
    'N Samples in Train': counts,
    'N Samples in Test': counts_test,
    'Frequency in Train': counts / counts.sum(),
    'Frequency in Test': counts_test / counts_test.sum()
},index=classes)


Each 10000 training and 2500 test samples have :
	10 images (one per month from Feb to Nov)
	3 channels (B08 Near Infrared, B04 Red, B03 Green)
	32 x 32 pixels
X_train shape: (10000, 10, 3, 32, 32), Y_train shape: (10000,)
X_test shape: (2500, 10, 3, 32, 32), Y_test shape: (2500,)


## Random Forest Classifier

In [4]:
# Flatten the images to use them in a RandomForestClassifier
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

# Create a RandomForestClassifier
random_forest = RandomForestClassifier(random_state=0, class_weight="balanced", n_jobs=-1)

In [5]:
# Define the hyperparameters to tune
param_grid = {
    "n_estimators": [80],
    "min_samples_leaf": [20],
}
# Because the dataset is imbalanced, we use stratified k-fold cross-validation
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

# Use GridSearchCV with k-fold cross-validation to find the best parameters
grid_search = GridSearchCV(
    random_forest,
    param_grid,
    cv=skfold,
    scoring="balanced_accuracy",
    verbose=3,
)

grid_search.fit(X_train_flattened, Y_train)

# Retrieve the best estimator and parameters
best_rf = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END min_samples_leaf=20, n_estimators=80;, score=0.606 total time=  12.9s
[CV 2/5] END min_samples_leaf=20, n_estimators=80;, score=0.627 total time=  13.5s
[CV 3/5] END min_samples_leaf=20, n_estimators=80;, score=0.619 total time=  14.1s
[CV 4/5] END min_samples_leaf=20, n_estimators=80;, score=0.585 total time=  14.0s
[CV 5/5] END min_samples_leaf=20, n_estimators=80;, score=0.598 total time=  13.4s
Best Parameters: {'min_samples_leaf': 20, 'n_estimators': 80}


In [6]:
# Evaluate the model on the test set
test_predictions = best_rf.predict(X_test_flattened)

test_balanced_accuracy = balanced_accuracy_score(Y_test, test_predictions)
print(f"Test Balanced Accuracy: {100 * test_balanced_accuracy:.2f}%")

print("Classification report on test set:")
print(classification_report(Y_test, test_predictions, target_names=classes_names, zero_division=0))

Test Balanced Accuracy: 50.06%
Classification report on test set:
                                                       precision    recall  f1-score   support

                                       Avoine d'hiver       0.15      0.36      0.22        11
                                  Avoine de printemps       0.00      0.00      0.00         4
                                   Blé tendre d'hiver       0.93      0.40      0.56       781
                              Blé tendre de printemps       0.91      0.98      0.94       108
                                        Colza d'hiver       0.88      0.97      0.92       225
                                  Luzerne déshydratée       0.62      0.74      0.67        39
                                              Luzerne       0.27      0.42      0.33        36
                 Mélange de légumineuses et graminées       0.52      0.45      0.48        84
                                                 Maïs       0.71      0.62    

## Convolutional Neural Network

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv3D, MaxPooling3D, Flatten, Dense, Dropout, BatchNormalization
)

# Réorganiser les dimensions pour Conv3D et normaliser les données
X_train_reordered = np.transpose(X_train, (0, 1, 3, 4, 2)) / 255.0 # (batch_size, 10, 32, 32, 3)
X_test_reordered = np.transpose(X_test, (0, 1, 3, 4, 2)) / 255.0   # (batch_size, 10, 32, 32, 3)
input_shape = X_train_reordered.shape[1:]
num_classes = len(classes)

# Construction du modèle CNN 3D
model = Sequential([
    Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=input_shape),
    MaxPooling3D(pool_size=(2, 2, 2)),
    Dropout(0.25),
    # Ajouter une couche de convolution supplémentaire
    Conv3D(64, kernel_size=(3, 3, 3), activation='relu'),
    MaxPooling3D(pool_size=(2, 2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compiler le modèle
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=["Accuracy"])  # Accuracy correspond à SparseCategoricalAccuracy ici

# Résumé du modèle
model.summary()


2025-01-12 20:39:00.510830: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-12 20:39:00.521177: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-12 20:39:00.623247: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-12 20:39:00.705859: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736710740.786141  295192 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736710740.80

In [8]:
if Path("save/model.h5").exists():
    # Charger le modèle pré-entraîné
    model.load_weights("save/model.h5")
else:
    # Entra1ner le modèle
    history = model.fit(
        X_train_reordered, 
        Y_train,
        validation_split=0.3,
        epochs=20,
        batch_size=32,
        verbose=1
    )
    # Sauvegarder le modèle
    model.save("save/model.h5")

Epoch 1/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 52ms/step - Accuracy: 0.4288 - loss: 2.3867 - val_Accuracy: 0.5187 - val_loss: 1.7215
Epoch 2/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 48ms/step - Accuracy: 0.6868 - loss: 1.0125 - val_Accuracy: 0.5570 - val_loss: 1.6278
Epoch 3/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - Accuracy: 0.7276 - loss: 0.8632 - val_Accuracy: 0.5687 - val_loss: 1.3623
Epoch 4/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - Accuracy: 0.7369 - loss: 0.8308 - val_Accuracy: 0.6347 - val_loss: 1.2570
Epoch 5/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 47ms/step - Accuracy: 0.7721 - loss: 0.7583 - val_Accuracy: 0.6430 - val_loss: 1.2581
Epoch 6/20
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 46ms/step - Accuracy: 0.7741 - loss: 0.7255 - val_Accuracy: 0.6453 - val_loss: 1.2435
Epoch 7/20
[1m2



In [9]:
# Évaluer le modèle avec balanced_accuracy
test_loss, test_accuracy = model.evaluate(X_test_reordered, Y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Balanced Accuracy: {test_accuracy:.4f}")

# Prédire les classes
predictions = model.predict(X_test_reordered)
predicted_classes = np.argmax(predictions, axis=1)

# Rapport de classification
print("\nClassification Report:")
print(classification_report(Y_test, predicted_classes))

Test Loss: 1.7331
Test Balanced Accuracy: 0.5776
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.00      0.00      0.00         4
           2       0.75      0.70      0.72       781
           3       0.68      0.53      0.59       108
           4       0.94      0.91      0.93       225
           5       0.11      0.44      0.17        39
           6       0.09      0.42      0.15        36
           7       0.29      0.57      0.38        84
           8       0.63      0.23      0.34       116
           9       0.00      0.00      0.00        11
          10       0.48      0.69      0.56       265
          11       0.55      0.26      0.35       235
          12       0.70      0.69      0.69       266
          13       0.73      0.27      0.39        41
          14       0.00      0.00      0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
