### GPU check

In [None]:
# NVIDIA gpu check
!nvidia-smi

Sat Feb  3 22:02:12 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Setup (install and imports)

In [None]:
from IPython.display import clear_output

# LIBRARIES TO INSTALL
!pip install ultralytics
!pip install clearml
clear_output()

In [None]:
# IMPORTS
import os
import shutil
from ultralytics import YOLO
import random
import locale
from google.colab import files

from clearml import Task
import tensorboard

In [None]:
# different possibilities:
# v0 | leefilter | nlmeansfilter | waveletfilter

###################################
# REMEMBER TO SET TRAIN PARTITION #
###################################
ds_name = "waveletfilter"

cwd = os.getcwd()
configuration_file = os.path.join(cwd, "SAR-Ship-Dataset","data_"+ds_name+".yaml")
task_name = ds_name.capitalize()+" 10k"

### Setup (clearml)

In [None]:
#clearml imports
import clearml
clearml.browser_login()

<IPython.core.display.Javascript object>


🤖 ClearML connected successfully - let's build something! 🚀


In [None]:
task = Task.init(
    project_name = "test",
    task_name = task_name
)

model_variant = "yolov8n"
task.set_parameter("modal_variant", model_variant)

# number of images to pick to perform the training (the will be splitted
# into 70% training and 30% for validation)
nmr_images = 10000
task.set_parameter("nmr_images", nmr_images)

# Load the YOLOv8 model
model = YOLO(f"{model_variant}.pt") # load a pretrained model

args = dict(data    = configuration_file,
            epochs  = 100,
            imgsz   = 256,
            device  = 0,
            batch   = 256)

<IPython.core.display.Javascript object>


🤖 ClearML connected successfully - let's build something! 🚀
ClearML Task: created new task id=491ae3a75feb4bad851962148aa942f7
2024-02-03 18:39:14,089 - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page: https://app.clear.ml/projects/20fd35ca22f44c058a4937b57d40b24e/experiments/491ae3a75feb4bad851962148aa942f7/output/log
Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt to 'yolov8n.pt'...
ClearML Monitor: GPU monitoring failed getting GPU reading, switching off GPU monitoring


100%|██████████| 6.23M/6.23M [00:00<00:00, 104MB/s]


2024-02-03 18:39:24,039 - clearml.model - INFO - Selected model id: 8f334a97307d474fa91242f6c43022bb


### Setup (folder training and validation)

In [None]:
clone_repo = "https://github.com/eboss-dev/SAR-Ship-Dataset"

# todo: make this interactive in order to select only the dataset that we want
# and reduce the actual unzip of unecessary files

!git clone {clone_repo}

Cloning into 'SAR-Ship-Dataset'...
remote: Enumerating objects: 91, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (72/72), done.[K
remote: Total 91 (delta 38), reused 58 (delta 17), pack-reused 0[K
Receiving objects: 100% (91/91), 143.00 KiB | 2.60 MiB/s, done.
Resolving deltas: 100% (38/38), done.
Filtering content: 100% (6/6), 1.87 GiB | 56.58 MiB/s, done.


In [None]:
!unzip ./SAR-Ship-Dataset/ship_dataset_{ds_name}.zip -d ./SAR-Ship-Dataset/
# Once extracted remove the zip file downloaded from the github repo
!rm ./SAR-Ship-Dataset/ship_dataset{ds_name}.zip

clear_output()

In [None]:
DATASET_PATH = os.path.join(cwd, "SAR-Ship-Dataset")
folder = "ship_dataset_"+ds_name


def create_folders(image_path, label_path):
  try:
    os.makedirs(image_path)
    os.makedirs(label_path)
    return 1
  except Exception as e:
    print(f"Some error occured: {e}")
    return 0

# create train, test, valid and add the images and labels
def copy_in_new_folder(split, list_img):
  IMAGE_SPLIT_PATH = os.path.join(DATASET_PATH, folder, split, 'images')
  LABEL_SPLIT_PATH = os.path.join(DATASET_PATH, folder, split, 'labels')

  if(create_folders(IMAGE_SPLIT_PATH, LABEL_SPLIT_PATH)):
    for img in list_img:
      img = img.strip()
      shutil.move(os.path.join(DATASET_PATH, folder, img),  # FROM SOURCE
                  os.path.join(IMAGE_SPLIT_PATH, img))      # TO SPLIT PATH
      shutil.move(os.path.join(DATASET_PATH, folder, img[:-4]+'.txt'),
                os.path.join(LABEL_SPLIT_PATH, img[:-4]+'.txt'))

def delete_unnecessary(folder):
  _, _, files_list = next(os.walk(folder))

  for file in files_list:
    if(file.endswith(".txt") or file.endswith(".jpg")):
      os.remove(os.path.join(DATASET_PATH, folder, file))

def prepare_data_for_training():
  # 1 - read the relevant file to move
  # Open the file in read mode

  # 2 - copy train
  with open('./SAR-Ship-Dataset/train_images.txt', 'r') as file:
      # Read all lines into a list
      lines = file.readlines()
  copy_in_new_folder("train", lines)

  # 3 - copy validation
  with open('./SAR-Ship-Dataset/validation_images.txt', 'r') as file:
      # Read all lines into a list
      lines = file.readlines()
  copy_in_new_folder("validation", lines)

  # 4 - copy test
  with open('./SAR-Ship-Dataset/test_images.txt', 'r') as file:
      # Read all lines into a list
      lines = file.readlines()
  copy_in_new_folder("test", lines)

  # 5 - delete other files
  delete_unnecessary(os.path.join(DATASET_PATH, folder))

  print(f'✅ Dataset {folder} ready for training, validation and test!')

In [None]:
prepare_data_for_training()

✅ Dataset ship_dataset_waveletfilter ready for training, validation and test!


### Dataset training

In [None]:
# TODO epochs here and in the following can be parametrized using ClearML
# let's see how to do it

task.connect(args)
# data.yaml is downloaded from the github repo
data_train = model.train(**args) #train the model
task.close()

Ultralytics YOLOv8.1.9 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/SAR-Ship-Dataset/data_v0.yaml, epochs=100, time=None, patience=50, batch=256, imgsz=256, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True,

RuntimeError: Dataset '/content/SAR-Ship-Dataset/data_v0.yaml' error ❌ /content/SAR-Ship-Dataset/data_v0.yaml 'val:' key missing ❌.
'train' and 'val' are required in all data YAMLs.

### Validation process

In [None]:
# Load the best model from ClearML
preprocess_task = Task.get_task(project_name='test', task_name = task_name,
                                task_filter={'status': ['completed']})

model_path = preprocess_task.models['output'][-1].get_local_copy()


model = YOLO(model_path) # load best pretrained model

# PERFORM VALIDATION OF THE MODEL

data_validation = model.val(save_json = True, split="val", batch=256)

2024-02-03 22:13:22,187 - clearml.storage - INFO - Downloading: 5.00MB / 5.92MB @ 11.59MBs from https://files.clear.ml/test/waveletfilter%2010k.c467a57107ae4d4ca6f334daffdff2d5/models/best.pt
2024-02-03 22:13:22,192 - clearml.storage - INFO - Downloaded 5.92 MB successfully from https://files.clear.ml/test/waveletfilter%2010k.c467a57107ae4d4ca6f334daffdff2d5/models/best.pt , saved to /root/.clearml/cache/storage_manager/global/eb9382a59c97d952efc878503951cf89.best.pt
Ultralytics YOLOv8.1.9 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/validation/labels... 3000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3000/3000 [00:01<00:00, 2102.04it/s]


[34m[1mval: [0mNew cache created: /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/validation/labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:17<00:00,  1.42s/it]


                   all       3000       3857      0.911      0.888      0.939      0.565
Speed: 0.5ms preprocess, 0.7ms inference, 0.0ms loss, 1.2ms postprocess per image
Saving runs/detect/val3/predictions.json...
Results saved to [1mruns/detect/val3[0m


In [None]:
data_validation = model.val(save_json = True, split="test", batch=256)

Ultralytics YOLOv8.1.9 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)


[34m[1mval: [0mScanning /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/labels... 3000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3000/3000 [00:01<00:00, 2145.38it/s]


[34m[1mval: [0mNew cache created: /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:16<00:00,  1.39s/it]


                   all       3000       3787      0.915      0.892      0.942      0.573
Speed: 0.4ms preprocess, 0.7ms inference, 0.0ms loss, 1.2ms postprocess per image
Saving runs/detect/val4/predictions.json...
Results saved to [1mruns/detect/val4[0m


### Predict new data

In [None]:
# This is used for having a prediction on new data
# It is not mandatory but useful to test inference on new data
# In this we can extract from the model the actual folder where the weights
# are saved for retriving the best or use clearml to run inference

model = YOLO(model_path)
results = model.predict(
    source    = cwd + "/SAR-Ship-Dataset/ship_dataset_"+ds_name+"/test/images/",
    conf      = 0.5,  # confidence > 50%
    save      = True, # this will save the images with annotation in /runs/detect/predict/file_name.xyz
    save_txt  = True, # this will save the actual labels in /runs/detect/predict/labels/<original_file_name>.txt
    imgsz     = 256)





errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/3000 /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/images/Gao_ship_hh_0201608254401010040.jpg: 256x256 1 Ship, 6.2ms
image 2/3000 /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/images/Gao_ship_hh_0201608254401010068.jpg: 256x256 2 Ships, 9.0ms
image 3/3000 /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/images/Gao_ship_hh_0201608254401010073.jpg: 256x256 1 Ship, 9.0ms
image 4/3000 /content/SAR-Ship-Dataset/ship_dataset_waveletfilter/test/images/Gao_ship_hh_0201608254401012056.jpg: 256x256 1 Ship, 7.5ms
image 5/3000 /content/S

### Save locally the result of validation - test - predict
No need to run this if we connect with ClearML

In [None]:
def save_all_runs_locally():
  def getpreferredencoding(do_setlocale = True):
      return "UTF-8"
  locale.getpreferredencoding = getpreferredencoding
  !cd {cwd} && zip -r {cwd}/{ds_name}.zip {cwd}/runs/

  files.download(os.path.join(cwd, ds_name+'.zip'))

In [None]:
save_all_runs_locally()

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
  adding: content/runs/detect/predict2/Gao_ship_hh_020171021490106013.jpg (deflated 1%)
  adding: content/runs/detect/predict2/Sen_ship_hh_0201704130102015.jpg (deflated 2%)
  adding: content/runs/detect/predict2/Sen_ship_hh_0201608220201018.jpg (deflated 1%)
  adding: content/runs/detect/predict2/Gao_ship_vh_020170107170103011.jpg (deflated 10%)
  adding: content/runs/detect/predict2/Sen_ship_hv_0201709160102805.jpg (deflated 20%)
  adding: content/runs/detect/predict2/Gao_ship_hh_0201801212902023028.jpg (deflated 1%)
  adding: content/runs/detect/predict2/Sen_ship_hh_02017030301035039.jpg (deflated 1%)
  adding: content/runs/detect/predict2/Sen_ship_hv_02017032001041038.jpg (deflated 15%)
  adding: content/runs/detect/predict2/Gao_ship_hh_0201608254401012064.jpg (deflated 25%)
  adding: content/runs/detect/predict2/Sen_ship_hh_02018012601024029.jpg (deflated 3%)
  adding: content/runs/detect/predict2/Gao_ship_hh_02017090

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Future improvements

In [None]:
# FUTURE: benchmark to watch what is happening in the edge?
# FUTURE: tuning?