In [None]:
# Imports
import os
import json
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import re


# For deep learning
import torch
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.ops import box_iou

from torchmetrics.detection.mean_ap import MeanAveragePrecision
from sklearn.metrics import precision_recall_fscore_support
import numpy as np


# For augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Own package imports
os.chdir('/home/naro/projects/Rumex')

from config.paths_config import *
from config.config import *
from data.data_inspection import *
from data.augmentation import *
from data.dataset import *
from utils.viz_utils import *
from utils.data_utils import *
from models.model_factory import *
from scripts.evaluate import *
from scripts.train import *
from scripts.inference import predict_and_visualize_image, load_best_model
from utils.fiftyone_utils import *
from tuning.hyperparameters_tuning import *

VIZ = False

# Data Verification

In [None]:
# Initialize PathsConfig
pathconfig ={
    "dataset_name": "haldennord09",
    "darwin_root": "/home/naro/.darwin/datasets/digital-production",
    "dataset_version": "latest",
    "extension": 'png',
    "models_dir": '/home/naro/projects/Rumex/artifacts/models'
} 

paths_config = PathsConfig(**pathconfig)

# Initialize DataVerifier
data_verifier = DataVerifier(
    img_dir=paths_config.img_dir,
    annotations_dir=paths_config.annotations_dir,
    train_split_file=paths_config.train_split_file,
    test_split_file=paths_config.test_split_file,
    val_split_file=paths_config.val_split_file,
    extension=paths_config.extension
)

# Verify data
train_annotations, test_annotations, val_annotations = data_verifier.check_directory_contents()

# Initialize ImageProcessor
image_processor = ImagesClassesInspector(
    img_dir=paths_config.img_dir,
    annotations_dir=paths_config.annotations_dir
)

# Get image and annotation lists
train_images = data_verifier.get_image_files(train_annotations)
val_images = data_verifier.get_image_files(val_annotations)
test_images = data_verifier.get_image_files(test_annotations)

# Get image sizes
image_files = os.listdir(paths_config.img_dir)
train_sizes = image_processor.get_image_sizes(image_files)

# Get classes
annotation_files = train_annotations + test_annotations + val_annotations
classes = image_processor.get_classes(annotation_files)
print("\nClasses in the dataset:")
print(classes)

class_map = {name: idx + 1 for idx, name in enumerate(classes)}
print("\nThe created class map:")
print(class_map)

# Get image size stats
min_size, max_size = image_processor.get_image_size_stats(image_files)
print(f"Smallest image size: {min_size}")
print(f"Largest image size: {max_size}")

w_min, h_min = min_size
print(f"Width of smallest image: {w_min}")
print(f"Height of smallest image: {h_min}")


# Configure the augmentations

In [None]:
# Initialize AugmentationConfig
augmentation_config = AugmentationConfig(height=h_min, width=w_min)

# Get transforms
train_transform = augmentation_config.get_train_transform()
valid_transform = augmentation_config.get_valid_transform()

# Print transform configurations
print("Training transforms:")
print(train_transform)
print("\nValidation transforms:")
print(valid_transform)


# Creating datasets and dataloaders

In [None]:
train_loader, val_loader, test_loader = create_data_loaders(
    img_dir=paths_config.img_dir,
    annotation_dir=paths_config.annotations_dir,
    train_images=train_images,
    train_annotations=train_annotations,
    val_images=val_images,
    val_annotations=val_annotations,
    test_images=test_images,
    test_annotations=test_annotations,
    train_transform=train_transform,
    valid_transform=valid_transform,
    class_map=class_map
)

# Print the number of samples in each dataset
print(f"Number of samples in training dataset: {len(train_loader.dataset)}")
print(f"Number of samples in validation dataset: {len(val_loader.dataset)}")
print(f"Number of samples in test dataset: {len(test_loader.dataset)}")


In [None]:
class_map

# Data Visualization

In [None]:

if VIZ:
    print("Visualizing samples:")
    for i in range(3):
        print(f"\nSample {i+1}:")
        visualize_sample(train_loader.dataset, class_map,idx=None, figsize=(5,5))

    
    # Visualize augmentations
    print("Visualizing original image with augmentations:")
    for i in range(5):
        visualize_augmentations(
            dataset_without_augmentation=RumexDataSet(
                img_dir=paths_config.img_dir,
                annotation_dir=paths_config.annotations_dir,
                images_list=train_images,
                annotations_list=train_annotations,
                transform=None,
                class_map=class_map
            ),
            dataset_with_augmentation=RumexDataSet(
                img_dir=paths_config.img_dir,
                annotation_dir=paths_config.annotations_dir,
                images_list=train_images,
                annotations_list=train_annotations,
                transform=train_transform,
                class_map=class_map
            ),
            classes=classes,
            num_augmented=5
        )


# Model creation

Current possible combinations that are implemented:
  Model Name | Model Backbone | Weights |
 |----------|----------|----------|
 | fasterrcnn    | resnet50   | COCO_V1  |
 | fasterrcnn    | mobilenet_v3_large_320   | COCO_V1  |
 | fasterrcnn    | mobilenet_v3_large   | COCO_V1  |
 | fasterrcnnV2    | resnet50   | COCO_V1  |
 | retinanet    | resnet50  | COCO_V1   |

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define number of classes (update this for your dataset)
num_classes = 2  # e.g., background + bird

# Initialize a Faster R-CNN model with ResNet50 backbone
model_config = {
    'model_name': 'fasterrcnn',
    'backbone_name': 'resnet50',
    'num_classes': num_classes,
    'device': device,
    'weights': 'COCO_V1',
    'train_backbone': False
}

model = init_model(**model_config)

# Print model summary
print("\nModel Overview:")
print(f"Model type: Faster R-CNN with ResNet50 backbone")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")
print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")


# Model training with default hypeparameters

In [None]:
#evaluate_map50(model, val_loader, device, iou_threshold=0.5, conf_threshold=0.5)

There are two way o run mlflow (or not) and work with it.

- Either I log everything to the generic server available. In this case, I have to:

1- start the server the terminal mlflow server --host 127.0.0.1 --port 5000

2- Set a tracking uri to the same port that was assigned to the server: mlflow.set_tracking_uri("http://localhost:5000")

- Or, I do not start any tracking server, in this case the experiments default to the folder location of the code from which the
code is executed.

1- Do not assign the port 5000 as a tracking uri.

2- mlflow ui --backend-store-uri /path/to/mlruns/

In [None]:
# Load configuration
from torchinfo import summary
from datetime import datetime
import mlflow
from mlflow.models import infer_signature
 

# Before running the below code, one has to run the mlflow server so that
# it starts tracking the experiments: open a terminal

mlflow.set_tracking_uri("http://localhost:5000")
version =get_dataset_version_from_darwin(pathconfig['dataset_name'], 'digital-production')
experiment_name = pathconfig['dataset_name'] + '_V' + version +  "_" + model_config['model_name'] + "_" + model_config['backbone_name'] + "_" + datetime.now().strftime("%Y%m%d_%H%M%S")
experiment_name

mlflow.set_experiment(experiment_name = experiment_name)

In [None]:

config_path = '/home/naro/projects/Rumex/config/model_config.json'
config = load_config(config_path)

with mlflow.start_run():
    params = config
    # Log training parameters.
    mlflow.log_params(params)

    # Log model summary.
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(model)))
    mlflow.log_artifact("model_summary.txt")

    train_model(model, train_loader, val_loader, config, device)


    # Save the trained model to MLflow.
    mlflow.pytorch.log_model(model, "model")

In [None]:
run_name = 'Saturday'
with mlflow.start_run(run_name=run_name) as run:  # This will be the detailed run name (a child inside an experiment)
    # Log parameters
    mlflow.log_params({
        "architecture": config['architecture'],
        "optimizer": config.get('optimizer', 'sgd'),
        "learning_rate": config['learning_rate'],
        "batch_size": train_loader.batch_size if hasattr(train_loader, 'batch_size') else config.get('batch_size', 8),
        "num_epochs": config['epochs'],
    })
    
    # Get run ID for artifact paths
    run_id = run.info.run_id
    print(f"MLflow Run ID: {run_id}")
    print(f"Tracking URI: {mlflow.get_tracking_uri()}")
    
    # Setup optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    
    # Optimizer
    if config.get('optimizer') == 'adam':
        optimizer = torch.optim.Adam(params, lr=config['learning_rate'], weight_decay=0.0005)
    else:
        optimizer = torch.optim.SGD(params, lr=config['learning_rate'], momentum=0.9, weight_decay=0.0005)
    
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    # Track best model
    best_map_50 = 0
    best_model_path = os.path.join(config.get('models_dir', './models'), f'best_model_{run_id}.pth')
    
    # Ensure models directory exists
    os.makedirs(os.path.dirname(best_model_path), exist_ok=True)

    device = next(model.parameters()).device
    
    # Training loop
    for epoch in range(config['epochs']):
        # Training
        model.train()
        train_loss = 0
        batch_losses = {}
        
        for batch_idx, (images, targets) in enumerate(train_loader):
            # Move to device
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Forward pass
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            # Track individual loss components
            for k, v in loss_dict.items():
                if k not in batch_losses:
                    batch_losses[k] = 0
                batch_losses[k] += v.item()
            
            # Backward pass
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            
            train_loss += losses.item()
            
            # Print progress
            if batch_idx % 10 == 0:
                print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {losses.item():.4f}')
        
        # Average training loss
        avg_train_loss = train_loss / len(train_loader)
        avg_batch_losses = {k: v / len(train_loader) for k, v in batch_losses.items()}
        
        # Validation with enhanced metrics
        val_metrics = evaluate(model, val_loader, device, iou_thresholds=[0.5, 0.75], conf_threshold=0.5)
        
        # Print detailed metrics report
        print(f"\nEpoch {epoch + 1}/{config['epochs']} Evaluation:")
        print_metrics_report(val_metrics, class_names=config.get('class_names', {1: "Rumex"}))
        
        # Update learning rate
        lr_scheduler.step()
        
        # Log metrics to MLflow
        # Main metrics
        mlflow.log_metric("epoch", epoch, step=epoch)
        mlflow.log_metric("learning_rate", optimizer.param_groups[0]['lr'], step=epoch)
        mlflow.log_metric("train_loss", avg_train_loss, step=epoch)
        
        # Log individual loss components
        for loss_name, loss_value in avg_batch_losses.items():
            mlflow.log_metric(f"train_{loss_name}", loss_value, step=epoch)
        
        # mAP metrics
        mlflow.log_metric("mAP", val_metrics['mAP'], step=epoch)
        mlflow.log_metric("mAP_50", val_metrics['mAP_50'], step=epoch)
        mlflow.log_metric("mAP_75", val_metrics['mAP_75'], step=epoch)
        
        # Overall metrics
        mlflow.log_metric("micro_precision", val_metrics['micro_precision'], step=epoch)
        mlflow.log_metric("micro_recall", val_metrics['micro_recall'], step=epoch)
        mlflow.log_metric("micro_f1", val_metrics['micro_f1'], step=epoch)
        mlflow.log_metric("macro_f1", val_metrics['macro_f1'], step=epoch)
        
        # Log per-class metrics
        for class_id, metrics in val_metrics['class_metrics'].items():
            for metric_name, value in metrics.items():
                if metric_name in ['precision', 'recall', 'f1']:
                    mlflow.log_metric(f"{class_id}_{metric_name}", value, step=epoch)
        
        # Confusion matrix elements
        mlflow.log_metric("total_TP", val_metrics['total_TP'], step=epoch)
        mlflow.log_metric("total_FP", val_metrics['total_FP'], step=epoch)
        mlflow.log_metric("total_FN", val_metrics['total_FN'], step=epoch)
        
        # Visualization of PR curves
        fig = visualize_pr_curves(val_metrics, class_names=config.get('class_names', {1: "Rumex"}))
        
        # Save figure to temp file and log as artifact
        pr_curve_path = f"pr_curve_epoch_{epoch}.png"
        fig.savefig(pr_curve_path)
        mlflow.log_artifact(pr_curve_path)
        plt.close(fig)
        os.remove(pr_curve_path)  # Clean up temp file
        
        # Save best model
        if val_metrics['mAP_50'] > best_map_50:
            best_map_50 = val_metrics['mAP_50']
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_metrics': val_metrics,
                'best_map_50': best_map_50,
            }, best_model_path)
            
            # Log best model metrics
            mlflow.log_metric("best_map_50", best_map_50, step=epoch)
            mlflow.log_metric("best_epoch", epoch, step=epoch)
            
            # Log model as artifact
            mlflow.pytorch.log_model(model, "best_model")
            
            print(f"Saved new best model with mAP@50: {best_map_50:.4f}")
    
    # Log final best model path
    mlflow.log_param("best_model_path", best_model_path)
    
    # Final validation metrics
    final_metrics = evaluate(model, val_loader, device, iou_thresholds=[0.5, 0.75], conf_threshold=0.5)
    print("\nFinal Evaluation:")
    print_metrics_report(final_metrics, class_names=config.get('class_names', {1: "Rumex"}))
    
    # Load the best model for returning
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

# Standalone Evaluation

# Sample inference

In [None]:
best_model_path = os.path.join(paths_config.models_dir, 'best_model.pth')
model = load_best_model(model, best_model_path)

# Make predictions on multiple test samples
print("Visualizing predictions on test samples:")
for i in range(20):  # Show 3 random samples
    print(f"\nTest Sample {i + 1}:")
    predict_and_visualize_image(model, test_loader.dataset, device=torch.device('cuda'), confidence_threshold=0.5)


# Inference on a big image

# Inference and Georeferencing for a whole flight

# Mirror to fiftyone

- Create the dataset in fiftyone, including training, testing and validation set.
- For a specific model, create a new fiftyone dataset with the inference - Save the inferences somewhere (for later dispatching)

In [None]:
file = os.path.join(paths_config.annotations_dir, annotation_files[0])
ann = json.load(open(file))
ann
for ann in ann['annotations']:
    print(ann['bounding_box'] )
    print(ann['name'])

# Hyperparameters tuning

In [None]:
hyperparameter_tuning()