In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score, accuracy_score

In [2]:
base_path = Path('data/')

X_train = np.load(base_path / 'train_images.npy').astype(np.float32)
Y_train = np.load(base_path / 'train_labels.npy')

X_test = np.load(base_path / 'test_public_images.npy').astype(np.float32)
Y_test = np.load(base_path / 'test_public_labels.npy')

classes, counts = np.unique(Y_train, return_counts=True)
counts_test = np.unique(Y_test, return_counts=True)[1]

In [3]:
classes_names = [
    "Avoine d'hiver",
    "Avoine de printemps",
    "Blé tendre d'hiver",
    "Blé tendre de printemps",
    "Colza d'hiver",
    "Luzerne déshydratée",
    "Luzerne",
    "Mélange de légumineuses et graminées",
    "Maïs",
    "Maïs ensilage",
    "Orge d'hiver",
    "Orge de printemps",
    "Prairie permanente - herbe",
    "Prairie permanente - ressources fourragères ligneuses",
    "Prairie en rotation longue",
    "Pomme de terre de consommation",
    "Prairie temporaire",
    "Soja",
    "Tournesol",
    "Triticale d'hiver"
]
months = ['Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov']

print(f'Each {X_train.shape[0]} training and {X_test.shape[0]} test samples have :\n'
      f'\t{X_train.shape[1]} images (one per month from Feb to Nov)\n'
      f'\t{X_train.shape[2]} channels (B08 Near Infrared, B04 Red, B03 Green)\n'
      f'\t{X_train.shape[3]} x {X_train.shape[4]} pixels')
print(f'X_train shape: {X_train.shape}, Y_train shape: {Y_train.shape}')
print(f'X_test shape: {X_test.shape}, Y_test shape: {Y_test.shape}')

classes_df = pd.DataFrame({
    'Class Name': classes_names,
    'N Samples in Train': counts,
    'N Samples in Test': counts_test,
    'Frequency in Train': counts / counts.sum(),
    'Frequency in Test': counts_test / counts_test.sum()
},index=classes)


Each 10000 training and 2500 test samples have :
	10 images (one per month from Feb to Nov)
	3 channels (B08 Near Infrared, B04 Red, B03 Green)
	32 x 32 pixels
X_train shape: (10000, 10, 3, 32, 32), Y_train shape: (10000,)
X_test shape: (2500, 10, 3, 32, 32), Y_test shape: (2500,)


In [4]:
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter

def create_simplified_dataset(X_data):

    simplified_data = []
    for sample_idx in range(X_data.shape[0]):
        for month_idx in range(X_data.shape[1]):
            image = X_data[sample_idx, month_idx, :, :, :]
            filtered_image = gaussian_filter(image, sigma=1)
            avg_rgb = np.mean(filtered_image, axis=(1, 2))
            simplified_data.append({
              'sample_id':sample_idx,
              'month':month_idx,
              'R': avg_rgb[0],
              'G': avg_rgb[1],
              'B': avg_rgb[2]
            })

    return pd.DataFrame(simplified_data)


simplified_train_df = create_simplified_dataset(X_train)
print(simplified_train_df.head())

simplified_test_df = create_simplified_dataset(X_test)
print(simplified_test_df.head())

   sample_id  month            R            G            B
0          0      0  2815.662109  2280.853271  1920.013916
1          0      1  3364.649414  2543.026611  2010.343506
2          0      2  3690.846680  2437.498291  1626.443115
3          0      3  4328.185059  2770.220215  1770.037842
4          0      4  3982.349365  3261.173828  2689.330078
   sample_id  month            R            G            B
0          0      0  2967.741211  2450.070312  2069.227295
1          0      1  4040.710205  3004.723145  2310.290527
2          0      2  4199.181641  2742.998047  1798.129150
3          0      3  4001.596191  2627.909180  1729.434082
4          0      4  3500.541504  2884.736816  2385.992920


In [5]:
import tensorflow as tf


model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(30,)),
  tf.keras.layers.Dense(25, activation='leaky_relu'),
  tf.keras.layers.Dense(20, activation='softmax')
])


model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


model.summary()

2025-01-12 20:27:32.703133: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-12 20:27:32.796281: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-12 20:27:32.874573: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736710052.942121  691367 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736710052.963271  691367 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-12 20:27:33.643556: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [6]:
X_train_nn = simplified_train_df.drop(columns=['sample_id', 'month']).values.reshape(-1, 10, 3)
X_test_nn = simplified_test_df.drop(columns=['sample_id', 'month']).values.reshape(-1, 10, 3)

X_train_nn = X_train_nn.reshape(X_train_nn.shape[0], -1)  # Résultat : (10000, 30)
X_test_nn = X_test_nn.reshape(X_test_nn.shape[0], -1)

print(f'X_train_nn shape: {X_train_nn.shape}, Y_train shape: {Y_train.shape}')

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train_nn, Y_train, epochs=10)

loss, accuracy = model.evaluate(X_test_nn, Y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Y_pred = np.argmax(model.predict(X_test_nn), axis=1)
print(classification_report(Y_test, Y_pred))

X_train_nn shape: (10000, 30), Y_train shape: (10000,)
Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 696us/step - accuracy: 0.1930 - loss: 973.2152  
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687us/step - accuracy: 0.4565 - loss: 78.2545
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 726us/step - accuracy: 0.5232 - loss: 51.4224
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749us/step - accuracy: 0.5528 - loss: 39.0151
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 710us/step - accuracy: 0.5913 - loss: 29.5236
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 679us/step - accuracy: 0.5891 - loss: 27.7729
Epoch 7/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 666us/step - accuracy: 0.6199 - loss: 21.2440
Epoch 8/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749u

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
