<a href="https://colab.research.google.com/github/erwanBellon/2025_ML_EES/blob/main/project/code/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part I: Setup



In [2]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Is this notebook running on Colab or Kaggle?
IS_COLAB = "google.colab" in sys.modules
IS_KAGGLE = "kaggle_secrets" in sys.modules

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected.  CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")
    if IS_KAGGLE:
        print("Go to Settings > Accelerator and select GPU.")

# Common imports
import numpy as np
import os
from pathlib import Path

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save figures or outputs
PROJECT_ROOT_DIR = Path.cwd()                  # current notebook directory
OUTPUTS_PATH = PROJECT_ROOT_DIR / "outputs"    # create "outputs" folder here
OUTPUTS_PATH.mkdir(parents=True, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = OUTPUTS_PATH / f"{fig_id}.{fig_extension}"
    print("Saving figure:", path)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)


# Loading Tensorboard
%load_ext tensorboard

No GPU was detected.  CNNs can be very slow without a GPU.
Go to Runtime > Change runtime and select a GPU hardware accelerator.


# Part 2: Load files
## 2.1: Load the presences and absences landcover crops

In [11]:
print(os.getcwd())


/content


In [6]:
from pathlib import Path
import rasterio
import numpy as np
from sklearn.model_selection import train_test_split

# Define folder paths
presences_path = Path("project/data/cropped_landcover/presences")
absences_path = Path("project/data/cropped_landcover/absences")

# Function to load images from a folder
def load_images_from_folder(folder):
    tif_files = list(folder.glob("*.tif"))
    images = []
    for tif in tif_files:
        with rasterio.open(tif) as src:
            img = src.read()  # shape: (channels, height, width)
            img = np.transpose(img, (1, 2, 0))  # (height, width, channels)
            images.append(img)
    return np.array(images)

# Load presence and absence images
images_pres = load_images_from_folder(presences_path)
images_abs = load_images_from_folder(absences_path)
print(f"Presences: {images_pres.shape}, Absences: {images_abs.shape}")

# Merge datasets
X = np.concatenate([images_pres, images_abs], axis=0)
y = np.concatenate([np.ones(len(images_pres)), np.zeros(len(images_abs))], axis=0)

# Shuffle and split into train, validation, test
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train shape:", X_train.shape, y_train.shape)
print("Validation shape:", X_valid.shape, y_valid.shape)
print("Test shape:", X_test.shape, y_test.shape)


Presences: (0,), Absences: (0,)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.