In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import sys
sys.path.append('d:/flagellar/code/core')
sys.path.append('/flagellar/code/core/')
sys.path.append('/kaggle/input/my-flagellar-library/')
import flg_support as fls
import flg_unet
import flg_numerics
import flg_model
import importlib
import numpy as np
import matplotlib.pyplot as plt
import time
import copy
import flg_yolo

fast_mode = False
if fls.is_submission:
    fast_mode = False

In [2]:
all_data = fls.load_all_train_data()
np.random.default_rng(seed=0).shuffle(all_data)

# Pick N tomograms with 1 motor and N tomograms with 0 motors as validation set
N=100
n_motors = np.array([len(d.labels) for d in all_data])
inds_zero = np.argwhere(n_motors==0)[:N,0]
inds_one = np.argwhere(n_motors==1)[:N,0]
inds_test = np.concatenate((inds_zero,inds_one))
inds_train = np.setdiff1d(np.arange(len(n_motors)), inds_test)
inds_test.shape, inds_train.shape

train_data = []
for i in inds_train:
    train_data.append(all_data[i])
test_data = []
for i in inds_test:
    test_data.append(all_data[i])
np.random.default_rng(seed=0).shuffle(test_data)
test_data = test_data
if fast_mode:
    train_data = train_data[1:30]
    test_data = test_data[2:4]
len(train_data), len(test_data)
fls.dill_save(fls.temp_dir + 'repr.pickle', ([d.name for d in train_data], [d.name for d in test_data]))

In [3]:
importlib.reload(flg_yolo)
model = flg_yolo.YOLOModel()
model.seed = 42
model.n_epochs = 5
model.run_in_parallel = True
if fast_mode: model.n_epochs = 1
model.train(train_data, test_data)
if not fls.env=='kaggle':
    fls.dill_save(fls.model_dir+ "yolo_repr.pickle", model)

Will process approximately 3123 slices for training


Processing training motors:   0%|          | 0/347 [00:00<?, ?it/s]

Will process approximately 900 slices for validation


Processing validation motors:   0%|          | 0/100 [00:00<?, ?it/s]


Processing Summary:
- Train set: 258 tomograms, 347 motors, 3118 slices
- Validation set: 100 tomograms, 100 motors, 900 slices
- Total: 358 tomograms, 447 motors, 4018 slices

Preprocessing Complete:
- Training data: 258 tomograms, 347 motors, 3118 slices
- Validation data: 100 tomograms, 100 motors, 900 slices
- Dataset directory: /flagellar/temp//yolo_dataset/
- YAML configuration: /flagellar/temp//yolo_dataset/dataset.yaml

Ready for YOLO training!
Starting YOLO training process...
Directory status:
- Train images exists: True
- Val images exists: True
- Train labels exists: True
- Val labels exists: True
Found original dataset.yaml at /flagellar/temp//yolo_dataset/dataset.yaml
Fixing YAML paths in /flagellar/temp//yolo_dataset/dataset.yaml
Created fixed YAML at /flagellar/temp/fixed_dataset.yaml with path: /flagellar/temp//yolo_dataset/
Using YAML file: /flagellar/temp/fixed_dataset.yaml
YAML contents:
names:
  0: motor
path: /flagellar/temp//yolo_dataset/
train: images/train
val

100%|██████████| 49.7M/49.7M [00:01<00:00, 35.8MB/s]


Ultralytics 8.3.98 🚀 Python-3.11.10 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4090, 23995MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/flagellar/models//yolov8m.pt, data=/flagellar/temp/fixed_dataset.yaml, epochs=5, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=5, cache=False, device=None, workers=4, project=/flagellar/temp//yolo_weights/, name=motor_detector, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=True, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=

[34m[1mtrain: [0mScanning /flagellar/temp/yolo_dataset/labels/train... 3118 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3118/3118 [00:01<00:00, 2113.53it/s]


[34m[1mtrain: [0mNew cache created: /flagellar/temp/yolo_dataset/labels/train.cache


[34m[1mval: [0mScanning /flagellar/temp/yolo_dataset/labels/val... 900 images, 0 backgrounds, 0 corrupt: 100%|██████████| 900/900 [00:00<00:00, 2300.66it/s]


[34m[1mval: [0mNew cache created: /flagellar/temp/yolo_dataset/labels/val.cache
Plotting labels to /flagellar/temp/yolo_weights/motor_detector/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1m/flagellar/temp/yolo_weights/motor_detector[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5      6.33G      3.239      5.545      1.254         20        640: 100%|██████████| 195/195 [00:28<00:00,  6.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:04<00:00,  6.34it/s]

                   all        900        900     0.0248     0.0511    0.00468   0.000742






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5      8.26G      2.508      2.048      1.035         29        640: 100%|██████████| 195/195 [00:24<00:00,  7.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:04<00:00,  5.93it/s]

                   all        900        900      0.321      0.355       0.17     0.0338






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      8.26G      2.292      1.818     0.9935         22        640: 100%|██████████| 195/195 [00:23<00:00,  8.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:03<00:00,  7.84it/s]


                   all        900        900      0.675      0.623      0.652      0.194

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5      8.26G      2.075      1.564     0.9542         13        640: 100%|██████████| 195/195 [00:24<00:00,  8.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:03<00:00,  7.76it/s]

                   all        900        900      0.774      0.684       0.72        0.2






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      8.26G      1.871      1.331     0.9139         26        640: 100%|██████████| 195/195 [00:24<00:00,  7.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:03<00:00,  7.93it/s]

                   all        900        900      0.823      0.778      0.818      0.244






5 epochs completed in 0.042 hours.
Optimizer stripped from /flagellar/temp/yolo_weights/motor_detector/weights/last.pt, 52.0MB
Optimizer stripped from /flagellar/temp/yolo_weights/motor_detector/weights/best.pt, 52.0MB

Validating /flagellar/temp/yolo_weights/motor_detector/weights/best.pt...
Ultralytics 8.3.98 🚀 Python-3.11.10 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 4090, 23995MiB)
Model summary (fused): 92 layers, 25,840,339 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 29/29 [00:06<00:00,  4.35it/s]


                   all        900        900      0.679      0.757      0.772      0.279
Speed: 0.1ms preprocess, 4.3ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1m/flagellar/temp/yolo_weights/motor_detector[0m

Training complete!


In [4]:
# if not fls.is_submission:
#     importlib.reload(flg_yolo)    
#     inferred_test_data = model.infer(test_data[1:10])
#     print(fls.score_competition_metric(inferred_test_data, test_data))

In [5]:
inferred_test_data2 = model.infer(fls.load_all_test_data())
fls.write_submission_file(inferred_test_data2)

CUDA_VISIBLE_DEVICES= 0
Model summary (fused): 92 layers, 25,840,339 parameters, 0 gradients, 78.7 GFLOPs
       tomo_id  Motor axis 0  Motor axis 1  Motor axis 2
0  tomo_003acc            -1            -1            -1
1  tomo_00e047           174           547           602
2  tomo_01a877           143           639           285


In [6]:
inferred_test_data2

[Data(is_train=False, name='tomo_003acc', labels=Empty DataFrame
 Columns: []
 Index: [], labels_unfiltered=Empty DataFrame
 Columns: []
 Index: [], loaded_state='unloaded', data=None, data_shape=(0, 0, 0), voxel_spacing=nan, mean_per_slice=array([], dtype=float64), std_per_slice=array([], dtype=float64), percentiles_per_slice=array([], shape=(101, 0), dtype=float64), resize_factor=1.0, target_size=None),
 Data(is_train=False, name='tomo_00e047', labels=     z    y    x
 0  174  547  602, labels_unfiltered=Empty DataFrame
 Columns: []
 Index: [], loaded_state='unloaded', data=None, data_shape=(0, 0, 0), voxel_spacing=nan, mean_per_slice=array([], dtype=float64), std_per_slice=array([], dtype=float64), percentiles_per_slice=array([], shape=(101, 0), dtype=float64), resize_factor=1.0, target_size=None),
 Data(is_train=False, name='tomo_01a877', labels=     z    y    x
 0  143  639  285, labels_unfiltered=Empty DataFrame
 Columns: []
 Index: [], loaded_state='unloaded', data=None, data_sh