In [2]:
# install ultralytics
!pip install ultralytics -t /nvmescratch/ceib/python310 --ignore-installed

Collecting ultralytics
  Using cached ultralytics-8.1.18-py3-none-any.whl.metadata (40 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Using cached matplotlib-3.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Using cached opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Using cached pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Using cached PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting requests>=2.23.0 (from ultralytics)
  Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Using cached scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting torch>=1.8.0 (from 

## Train-Test Split

In [19]:
import os
import pandas as pd

In [37]:
images_list = os.listdir('Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/images/train')
labels_list = os.listdir('Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/labels/train')

In [38]:
images_df=[]
for image in images_list:
    if image.replace('.jpeg', '.txt') in labels_list:
        images_df.append({
            'subject': image.split('_')[1],
            'image': image,
            'has_bb': 1
        })
    else:
        images_df.append({
            'subject': image.split('_')[1],
            'image': image,
            'has_bb': 0
        })

images_df = pd.DataFrame(images_df)

In [39]:
images_df

Unnamed: 0,subject,image,has_bb
0,3930,image_3930_7.jpeg,0
1,5736,image_5736_5.jpeg,1
2,5544,image_5544_3.jpeg,0
3,5593,image_5593_18.jpeg,1
4,6048,image_6048_9.jpeg,1
...,...,...,...
2495,5915,image_5915_10.jpeg,0
2496,3034,image_3034_8.jpeg,0
2497,5690,image_5690_11.jpeg,0
2498,5680,image_5680_9.jpeg,0


In [49]:
from sklearn.model_selection import StratifiedGroupKFold

stratified_group_k_fold = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=42)

X = images_df['image']
y = images_df['has_bb']
groups = images_df['subject']

for i, (train_index, test_index) in enumerate(stratified_group_k_fold.split(X, y, groups)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"         group={groups[train_index]}")
    print(f"  Test:  index={test_index}")
    print(f"         group={groups[test_index]}")
    break

Fold 0:
  Train: index=[   0    1    2 ... 2497 2498 2499]
         group=0       3930
1       5736
2       5544
3       5593
5       5700
        ... 
2495    5915
2496    3034
2497    5690
2498    5680
2499    2413
Name: subject, Length: 1982, dtype: object
  Test:  index=[   4   11   16   18   19   21   27   33   41   43   46   47   50   60   64   68   72   77   86   87   97  101  102  108  109  112  114  122  124  126  132  134  156  157  160  161  165  168  181  182  186  192  193  199  203  209  211  218  220  224  243  244  245  250  252  260  262  279  284  288  289  290  296
  301  304  307  308  310  318  327  341  342  343  344  345  349  350  351  356  361  371  376  378  384  393  394  395  399  400  401  410  411  413  416  424  426  427  436  441  442  447  459  461  468  476  480  488  493  495  506  513  530  532  535  544  557  559  560  561  562  571  572  576  577  580  581
  583  584  592  593  596  598  603  605  612  627  635  655  658  666  668  671  672  673  6

In [50]:
train_index

array([   0,    1,    2, ..., 2497, 2498, 2499])

In [51]:
test_index

array([   4,   11,   16,   18,   19,   21,   27,   33,   41,   43,   46,   47,   50,   60,   64,   68,   72,   77,   86,   87,   97,  101,  102,  108,  109,  112,  114,  122,  124,  126,  132,  134,  156,  157,  160,  161,  165,  168,  181,  182,  186,  192,  193,  199,  203,  209,  211,  218,  220,  224,  243,  244,
        245,  250,  252,  260,  262,  279,  284,  288,  289,  290,  296,  301,  304,  307,  308,  310,  318,  327,  341,  342,  343,  344,  345,  349,  350,  351,  356,  361,  371,  376,  378,  384,  393,  394,  395,  399,  400,  401,  410,  411,  413,  416,  424,  426,  427,  436,  441,  442,  447,  459,  461,  468,
        476,  480,  488,  493,  495,  506,  513,  530,  532,  535,  544,  557,  559,  560,  561,  562,  571,  572,  576,  577,  580,  581,  583,  584,  592,  593,  596,  598,  603,  605,  612,  627,  635,  655,  658,  666,  668,  671,  672,  673,  677,  683,  689,  702,  703,  704,  708,  712,  718,  726,  728,  736,
        743,  745,  749,  753,  762,  763, 

In [52]:
images_df['partition'] = None
images_df.loc[train_index, 'partition'] = 'train'
images_df.loc[test_index, 'partition'] = 'val'

In [53]:
images_df

Unnamed: 0,subject,image,has_bb,partition
0,3930,image_3930_7.jpeg,0,train
1,5736,image_5736_5.jpeg,1,train
2,5544,image_5544_3.jpeg,0,train
3,5593,image_5593_18.jpeg,1,train
4,6048,image_6048_9.jpeg,1,val
...,...,...,...,...
2495,5915,image_5915_10.jpeg,0,train
2496,3034,image_3034_8.jpeg,0,train
2497,5690,image_5690_11.jpeg,0,train
2498,5680,image_5680_9.jpeg,0,train


In [55]:
images_df[images_df.has_bb == 0]['partition'].value_counts()

partition
train    1602
val       403
Name: count, dtype: int64

In [58]:
# Moving the images to the corresponding folders
import shutil

val_images = images_df[images_df['partition'] == 'val']

#Creating the folders
os.makedirs('Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/images/val', exist_ok=True)
os.makedirs('Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/labels/val', exist_ok=True)

for i,row in val_images.iterrows():
    # Moving the images
    image = row['image']
    shutil.move(f'Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/images/train/{image}', f'Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/images/val/{image}')
    
    if row['has_bb'] == 1:
        # Moving the labels
        shutil.move(f'Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/labels/train/{image.replace(".jpeg", ".txt")}', f'Prostate_Cancer_TFM/Bounding_Boxes/Prostate_Data/labels/val/{image.replace(".jpeg", ".txt")}')
    

## Training

In [1]:
from ultralytics import YOLO

In [2]:
model = YOLO('yolov8n.pt')

In [3]:
# Train the model

results = model.train(data='Prostate_Cancer_TFM/Bounding_Boxes/prostate.yaml', epochs=100, imgsz=640)

  return torch._C._cuda_getDeviceCount() > 0


Ultralytics YOLOv8.1.18 🚀 Python-3.10.13 torch-2.2.0+cu121 CPU (Intel Xeon Gold 6230 2.10GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=Prostate_Cancer_TFM/Bounding_Boxes/prostate.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=Fa

100%|██████████| 755k/755k [00:00<00:00, 32.3MB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

[34m[1mtrain: [0mScanning /clinicfs/userhomes/jaalzate/Prostate_Cancer_TFM/Bounding_Boxes/datasets/Prostate_Data/labels/train... 380 images, 1602 backgrounds, 0 corrupt: 100%|██████████| 1982/1982 [00:01<00:00, 1069.66it/s]


[34m[1mtrain: [0mNew cache created: /clinicfs/userhomes/jaalzate/Prostate_Cancer_TFM/Bounding_Boxes/datasets/Prostate_Data/labels/train.cache


[34m[1mval: [0mScanning /clinicfs/userhomes/jaalzate/Prostate_Cancer_TFM/Bounding_Boxes/datasets/Prostate_Data/labels/val... 115 images, 403 backgrounds, 0 corrupt: 100%|██████████| 518/518 [00:00<00:00, 1090.52it/s]


[34m[1mval: [0mNew cache created: /clinicfs/userhomes/jaalzate/Prostate_Cancer_TFM/Bounding_Boxes/datasets/Prostate_Data/labels/val.cache
Plotting labels to runs/detect/train8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)


2024/02/26 12:31:45 INFO mlflow.tracking.fluent: Experiment with name '/Shared/YOLOv8' does not exist. Creating a new experiment.


[34m[1mMLflow: [0mlogging run_id(05233b09d24a40c18c191b15c6c13a04) to runs/mlflow
[34m[1mMLflow: [0mview at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri runs/mlflow'
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train8[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100         0G      2.747      10.88      2.465          6        640: 100%|██████████| 124/124 [03:16<00:00,  1.59s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 17/17 [00:17<00:00,  1.01s/it]


                   all        518        121   0.000425      0.545   0.000367   0.000119

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100         0G      2.863      7.024      2.501          9        640:  68%|██████▊   | 84/124 [02:12<01:02,  1.57s/it]


KeyboardInterrupt: 

In [1]:
import torch

print(torch.cuda.is_available())

False


  return torch._C._cuda_getDeviceCount() > 0


In [None]:
model.train( )