# **Deep learning for dynamic network analysis (DLDNA)** <br> Final project

**Dolphins:** R. ARNAUD M. DELPLANQUE A. KARILA-COHEN A. RAMPOLDI

Comprehensive soil classification dataset: https://www.kaggle.com/datasets/ai4a-lab/comprehensive-soil-classification-datasets/code

### **1. Preliminnary tasks**

**Import of the libraries**

In [8]:
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings to keep notebook clean

import random
import numpy as np
import matplotlib.pyplot as plt
import math
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import sys
import pandas as pd
import os
import cv2
from PIL import Image
import matplotlib.pyplot as plt

**Path configuration**

In [9]:
PROJECT_ROOT = Path.cwd().parent.resolve()
DATA_DIR= PROJECT_ROOT / "data"
PARAM_FILE = PROJECT_ROOT / "txt" / "parameters.txt"
# utils.py functions
UTILS_DIR = PROJECT_ROOT / "src"
sys.path.append(str(PROJECT_ROOT / "src"))
from utils import load_parameters, load_images
from visualization import show_soil_grid

**Choose the good torch device**

In [10]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'xpu' if hasattr(torch, "xpu") and torch.xpu.is_available() else 'cpu'
print(f"Params loaded. Device: {DEVICE}")

Params loaded. Device: cpu


**General parameters**

In [11]:
# Load parameters from external file
params = load_parameters(PARAM_FILE)
globals().update(params)
soil_types = params["SOIL_TYPES"].split(",")


Loaded parameters:
  TRAIN_RATIO = 0.7
  VAL_RATIO = 0.1
  TEST_RATIO = 0.2
  BATCH_SIZE = 32
  EPOCHS = 100
  LEARNING_RATE = 0.01
  SEED = 42
  SOIL_TYPES = Alluvial_Soil,Arid_Soil,Black_Soil,Laterite_Soil,Mountain_Soil,Red_Soil,Yellow_Soil


**Seeding to ensure reproducibility**

In [12]:
# Use parameters for seed and device
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x2bda4e62750>

**Load the dataset**

In [13]:
images_dict = {}

for soil in soil_types:
    folder = DATA_DIR / "Orignal-Dataset" / soil
    images_dict[soil] = load_images(folder)
    print(f"{soil}: {len(images_dict[soil])} images loaded")


Alluvial_Soil: 51 images loaded
Arid_Soil: 284 images loaded
Black_Soil: 255 images loaded
Laterite_Soil: 219 images loaded
Mountain_Soil: 201 images loaded
Red_Soil: 109 images loaded
Yellow_Soil: 69 images loaded


**Display the first pictures of each type of soil**

In [14]:
# images_dict = {soil_type: [(img_bgr, filename), ...], ...}
show_soil_grid(images_dict, n_per_type=5, tile_size=(240, 240), pad=12)

# **1. Convolutional Neural Network (CNN)**

Penser à convertir les X et y en tenseur torch avant de procéder au datasplit via ``sklearn.model_selection.train_test_split`` <br>
```X = torch.FloatTensor(X) ``` et ```y = torch.FloatTensor(y) ``` <br

> Basic convolutional neural network

In [None]:
import torch
import torch.nn as nn

class SimpleCNN(nn.Module):
    """
    Simple CNN for RGB image classification with 128x128 input.
    Uses AdaptiveAvgPool2d -> works even if input size changes.
    """
    def __init__(self, num_classes=7, dropout=0.3):
        super().__init__()

        self.features = nn.Sequential(
            # Block 1: 128 -> 64
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            # Block 2: 64 -> 32
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            # Block 3: 32 -> 16
            nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(128, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.features(x)          # (B,128,16,16) if input=128x128
        x = self.gap(x).flatten(1)    # (B,128)
        x = self.classifier(x)        # (B,num_classes)
        return x


**Test model**

In [16]:
def test_model(model, test_loader, loss_fn, device):
    """Perform final testing on the model using the held-out test set."""
    model.eval()  # Set to evaluation mode
    test_loss = 0.0  # Initialize test loss
    all_preds = []  # Store all predictions
    all_actuals = []  # Store all actual values
    
    with torch.no_grad():  # Disable gradient computation
        for xb, yb in test_loader:
            xb = xb.to(device)  # Move batch to device
            yb = yb.to(device)  # Move labels to device
            preds = model(xb)  # Forward pass
            test_loss += loss_fn(preds, yb).item()  # Accumulate loss
            all_preds.append(preds)  # Store predictions
            all_actuals.append(yb)  # Store actuals
            
    test_loss /= len(test_loader)  # Average loss
    all_preds = torch.cat(all_preds)  # Concatenate all predictions
    all_actuals = torch.cat(all_actuals)  # Concatenate all actuals
    
    return test_loss, all_preds, all_actuals

**Hyperparameters tuning**

> Grid search to find the best hyperparameters (```learning_rates, hidden_sizes```)

In [17]:
# learning_rates = [1e-1, 1e-2, 1e-3, 1e-4, 5e-4] 
# hidden_sizes_options = [(32,16),(64,32), (128,64)]

# def grid_search_hyperparameters(
#     train_loader,
#     val_loader,
#     learning_rates,
#     hidden_sizes_options,
#     device,
#     epochs=10,
#     base_model=None,
#     model_fn=None,
#     save_path="best_model.pth",
# ):
#     """
#     Grid search over learning rates and hidden layer sizes.
#     - model_fn: callable taking hidden_sizes (e.g., (h1, h2)) and returning an nn.Module (on CPU).
#     - hidden_sizes_options: list of tuples like [(64,32), (128,64), ...]
#     Saves the globally best model (by validation accuracy) to 'save_path'.
#     Returns: (results_list, best_cfg_dict, best_model_loaded)
#     """
#     assert model_fn is not None, "Provide model_fn(hidden_sizes) -> nn.Module"

#     results = []  # Store all results
#     best_val_acc = -1.0  # Track best validation accuracy
#     best_cfg = None  # Track best configuration
#     best_state = None  # Track best model state

#     for lr in learning_rates:  # Loop over learning rates
#         for hidden_sizes in hidden_sizes_options:  # Loop over architectures
#             print(f"Testing: lr={lr}, hidden_sizes={hidden_sizes}")

#             model = model_fn(hidden_sizes).to(device)  # Create model
#             optimizer = torch.optim.Adam(model.parameters(), lr=lr)  # Create optimizer
#             loss_fn = nn.CrossEntropyLoss()  # Define loss function

#             # Train model
#             _, _, _, train_accuracies, val_accuracies = train_with_validation(
#                 model=model,  # Model to train
#                 train_loader=train_loader,  # Training data
#                 val_loader=val_loader,  # Validation data
#                 optimizer=optimizer,  # Optimizer
#                 loss_fn=loss_fn,  # Loss function
#                 device=device,  # Device
#                 epochs=epochs,  # Number of epochs
#                 task_type='classification'  # Task type
#             )

#             cur_best_val = max(val_accuracies)  # Get best validation accuracy

#             # Store results
#             results.append({
#                 'lr': lr,  # Learning rate
#                 'hidden_sizes': hidden_sizes,  # Architecture
#                 'best_val_acc': cur_best_val,  # Best validation accuracy
#                 'final_train_acc': train_accuracies[-1],  # Final training accuracy
#                 'final_val_acc': val_accuracies[-1]  # Final validation accuracy
#             })

#             print(f"Best validation accuracy: {cur_best_val:.2f}%")

#             # Update best model if this is better
#             if cur_best_val > best_val_acc:
#                 best_val_acc = cur_best_val  # Update best accuracy
#                 best_cfg = {'lr': lr, 'hidden_sizes': hidden_sizes}  # Update best config
#                 best_state = {k: v.cpu() for k, v in model.state_dict().items()}  # Save state to CPU
#                 if save_path is not None:
#                     torch.save(best_state, save_path)  # Save to disk
#                     print(f"Saved new best model to: {save_path}")

#             del model  # Free memory
#             if torch.cuda.is_available():
#                 torch.cuda.empty_cache()  # Clear CUDA cache

#     # Rebuild best model
#     best_model = None
#     if best_state is not None:
#         best_model = model_fn(best_cfg['hidden_sizes']).to(device)  # Create model
#         best_model.load_state_dict(best_state)  # Load best weights


#     return results, best_cfg, best_model
