In [None]:
# Imports
import os
import json
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import re
import yaml


# For deep learning
import torch
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.ops import box_iou

from torchmetrics.detection.mean_ap import MeanAveragePrecision
from sklearn.metrics import precision_recall_fscore_support
import numpy as np


# For augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Own package imports
os.chdir('/home/naro/projects/Rumex')

from src.augmentation import *
from src.dataset import *
from src.model_factory import *

from src.evaluate import *
from src.train import *
from src.inference import *
from utils.fiftyone_utils import *
from utils.data_inspection import *
from utils.viz_utils import *
from utils.data_utils import *
from utils.generic import *

VIZ = False

# Verify all pathes for the dataset are working correctly

In [None]:
config_file = "/home/naro/projects/Rumex/config/configs.yaml"
config = read_yaml(config_file)

dataset_name = config['dataset']['dataset_name']
darwin_root = config['dataset']['darwin_root']
dataset_version = config['dataset']['dataset_version']
images_extension = config['dataset']['extension']


img_dir, annotations_dir, train_split_file, test_split_file, val_split_file = format_darwin_related_pathes(dataset_name, darwin_root, dataset_version)

# Initialize DataVerifier
data_verifier = DataVerifier(
    img_dir = img_dir,
    annotations_dir = annotations_dir,
    train_split_file = train_split_file,
    test_split_file = test_split_file,
    val_split_file = val_split_file,
    extension = images_extension
)

# Verify data
train_annotations, test_annotations, val_annotations = data_verifier.check_directory_contents()

# Initialize ImageProcessor
image_processor = ImagesClassesInspector(
    img_dir=img_dir,
    annotations_dir=annotations_dir
)

# Get image and annotation lists
train_images = data_verifier.get_image_files(train_annotations)
val_images = data_verifier.get_image_files(val_annotations)
test_images = data_verifier.get_image_files(test_annotations)

# Get image sizes
image_files = os.listdir(img_dir)
train_sizes = image_processor.get_image_sizes(image_files)

# Get classes
annotation_files = train_annotations + test_annotations + val_annotations
classes = image_processor.get_classes(annotation_files)
print("\nClasses in the dataset:")
print(classes)

class_map = {name: idx + 1 for idx, name in enumerate(classes)}
print("\nThe created class map:")
print(class_map)

# Get image size stats
min_size, max_size = image_processor.get_image_size_stats(image_files)
print(f"Smallest image size: {min_size}")
print(f"Largest image size: {max_size}")

w_min, h_min = min_size
print(f"Width of smallest image: {w_min}")
print(f"Height of smallest image: {h_min}")


# Configure the augmentations

In [None]:
# Initialize AugmentationConfig
augmentation_config = AugmentationConfig(height=h_min, width=w_min)

# Get transforms
train_transform = augmentation_config.get_train_transform()
valid_transform = augmentation_config.get_valid_transform()

# Print transform configurations
print("Training transforms:")
print(train_transform)
print("\nValidation transforms:")
print(valid_transform)


# Create Dataset Loaders

In [None]:

train_loader, val_loader, test_loader = create_data_loaders(
    img_dir=img_dir,
    annotation_dir=annotations_dir,
    train_images=train_images,
    train_annotations=train_annotations,
    val_images=val_images,
    val_annotations=val_annotations,
    test_images=test_images,
    test_annotations=test_annotations,
    train_transform=train_transform,
    valid_transform=valid_transform,
    class_map=class_map,
    batch_size=8, 
    num_workers=2
)

# Print the number of samples in each dataset
print(f"Number of samples in training dataset: {len(train_loader.dataset)}")
print(f"Number of samples in validation dataset: {len(val_loader.dataset)}")
print(f"Number of samples in test dataset: {len(test_loader.dataset)}")


# Visualize some samples

In [None]:
if VIZ:
    print("Visualizing samples:")
    for i in range(3):
        print(f"\nSample {i+1}:")
        visualize_sample(train_loader.dataset, class_map,idx=None, figsize=(5,5))

    
    # Visualize augmentations
    print("Visualizing original image with augmentations:")
    for i in range(5):
        visualize_augmentations(
            dataset_without_augmentation=RumexDataSet(
                img_dir=img_dir,
                annotation_dir=annotations_dir,
                images_list=train_images,
                annotations_list=train_annotations,
                transform=None,
                class_map=class_map
            ),
            dataset_with_augmentation=RumexDataSet(
                img_dir=img_dir,
                annotation_dir=annotations_dir,
                images_list=train_images,
                annotations_list=train_annotations,
                transform=train_transform,
                class_map=class_map
            ),
            classes=classes,
            num_augmented=5
        )


# Model Creation

In [None]:
config

In [None]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = init_model(model_name= config['model']['model_name'],
                   backbone_name=config['model']['backbone'],
                   num_classes=config['model']['num_classes'],
                   device=device,
                   weights=config['model']['weights'],
                   train_backbone=config['model']['train_backbone'])

# Print model summary
print("\nModel Overview:")
print(f"Model type: Faster R-CNN with ResNet50 backbone")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")
print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")


# Experiment Setup

There are two way o run mlflow (or not) and work with it.

- Either I log everything to the generic server available. In this case, I have to:

1- start the server the terminal mlflow server --host 127.0.0.1 --port 5000

2- Set a tracking uri to the same port that was assigned to the server: mlflow.set_tracking_uri("http://localhost:5000")

- Or, I do not start any tracking server, in this case the experiments default to the folder location of the code from which the
code is executed.

1- Do not assign the port 5000 as a tracking uri.

2- mlflow ui --backend-store-uri /path/to/mlruns/

In [None]:
from torchinfo import summary
from datetime import datetime
import mlflow
from mlflow.models import infer_signature
 

# Before running the below code, one has to run the mlflow server so that
# it starts tracking the experiments: open a terminal

mlflow.set_tracking_uri("http://localhost:5000")
version =get_dataset_version_from_darwin(dataset_name, 'digital-production')
darwin_root = config['dataset']['darwin_root']
dataset_version = config['dataset']['dataset_version']
images_extension = config['dataset']['extension']

experiment_name = dataset_name + '_V' + version +  "_" + config['model']['model_name'] + "_" + config['model']['backbone'] + "_" + datetime.now().strftime("%Y%m%d_%H%M%S")
experiment_name

mlflow.set_experiment(experiment_name = experiment_name)

# Model Training

In [None]:
params = config['model'] 
params

In [None]:
TRAIN = False
if TRAIN:
    with mlflow.start_run():
        # Log training parameters.
        mlflow.log_params(params)

        # Log model summary.
        with open("model_summary.txt", "w") as f:
            f.write(str(summary(model)))
        mlflow.log_artifact("model_summary.txt")

        train_model(model, train_loader, val_loader, config['model'] , device)


        # Save the trained model to MLflow.
        mlflow.pytorch.log_model(model, "model")

# Inference on the test set

In [None]:
model = load_best_model(model = model,
                        best_model_path= "/home/naro/projects/Rumex/artifacts/models/best_model.pth")

In [None]:
predict_and_visualize_image_from_dataloader(model, test_loader, idx=1, device = torch.device('cuda'), confidence_threshold=0.5, figsize=(12, 12))
