# Devel
Develop code for the detection NFTs in WSIs using Ultralytics' implementation of YOLOv8 (https://docs.ultralytics.com/).

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Imports
import sys
sys.path.append('../..')

from ultralytics import YOLO
from pandas import read_csv
from os.path import join, isfile
from tqdm.notebook import tqdm
import cv2 as cv
from shutil import copyfile
import numpy as np

from neurotk import imread, imwrite, tile_roi_with_labels_wrapper
from neurotk.utils import create_dirs, get_filename, im_to_txt_path

### Train model

In [None]:
# Get YOLO model with pre-trained weights (from COCO dataset).
model = YOLO('yolov8m.pt')

In [None]:
# To train simply run the train method on the model parameter.
help(model.train)

In [None]:
# Specify parameters for training, including parameters used in NFT project.
kwargs = {
    'data': '/jcDataStore/Data/NeuroTK-Dash/nft-detection/datasets/' + \
            'nft-ai-project/dataset.yaml',
    'epochs': 3,
    'patience': 20,
    'batch': 8,
    'imgsz': 1280,
    'device': 0,  # for multiple GPUs you can pass the GPU ids in a list
    'project': '/jcDataStore/Data/NeuroTK-Dash/nft-detection/models/',
    'name': 'test',
    'exist_ok': True,
    'verbose': True,
    # hyps
    'box': 0.05,
    'cls': 0.3,
    'hsv_h': 0.015,
    'hsv_s': 0.2,
    'hsv_v': 0.4,
    'scale': 0.1,
    'flipud': 0.5,
    'fliplr': 0.5,
    'mosaic': 0.0
}

results = model.train(**kwargs)


### Create Test Dataset for Validation

In [4]:
# Read the test ROIs and downsample them 20X and save again.
rois_df = read_csv(
    '/jcDataStore/Data/nft-ai-project/datasets/test-datasets/rois.csv'
)
rois_df = rois_df[rois_df.roi_group == 'test-roi']
rois_df['sf'] = [1] * len(rois_df)

rois_df = rois_df.replace(
    '/workspace/data/',
    '/jcDataStore/Data/nft-ai-project/',
    regex=True
)

# Create location to save rois.
src_dir = '/jcDataStore/Data/NeuroTK-Dash/nft-detection'
roi_img_dir = join(src_dir, 'test-rois/images')
roi_label_dir = join(src_dir, 'test-rois/labels')
roi_bound_dir = join(src_dir, 'test-rois/boundaries')

create_dirs([roi_img_dir, roi_label_dir, roi_bound_dir])

for i, r in tqdm(rois_df.iterrows(), total=len(rois_df)):
    fn = get_filename(r.fp)
        
    new_img_fp = join(roi_img_dir, fn + '.png')
    
    # Read the image.
    img = imread(r.fp)
    
    # Convert the corners into a file.
    corners = r.roi_corners
    
    # Resize image.        
    img = cv.resize(img, None, fx=0.5, fy=0.5, interpolation=cv.INTER_AREA)
    
    h, w = img.shape[:2]
    
    imwrite(new_img_fp, img)
        
    # Save the corresponding label and boundary file.
    label_fp = im_to_txt_path(r.fp)
            
    if isfile(label_fp):
        # Convert the labels, they are in x1, y1, x2, y2 format.
        lines = ''
        
        with open(label_fp, 'r') as fh:
            for ln in fh.readlines():
                ln = ln.strip()
                if ln:
                    label, x1, y1, x2, y2 = [int(l) for l in ln.split(' ')]
                    
                    xc = (x1 + x2) / 2 / r.w
                    yc = (y1 + y2) / 2 / r.h
                    w = (x2 - x1) / r.w
                    h = (y2 - y1) / r.h
                    
                    lines += f'{label} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n'
                    
        with open(join(roi_label_dir, fn + '.txt'), 'w') as fh:
            fh.write(lines.strip())
                            
    corners = [int(c) for c in r.roi_corners.split(' ')]
    corners = np.array(corners).reshape((-1, 2)) - [r.x, r.y]
    corners = corners / [r.w, r.h]
    corners = corners.flatten().tolist()
    corners = ' '.join([str(c) for c in corners])
    
    with open(join(roi_bound_dir, f'{fn}.txt'), 'w') as fh:
        fh.write(corners)
            
    # Track the ROI metadata.
    rois_df.loc[i, 'mag'] = 20
    rois_df.loc[i, 'fp'] = new_img_fp
    rois_df.loc[i, 'h'] = h
    rois_df.loc[i, 'w'] = w
    rois_df.loc[i, 'sf'] = 0.5
    
rois_df = rois_df[[
    'wsi_name', 'case', 'wsi_id', 'Braak_stage', 'region', 'fp', 'mag', 'h', 
    'w', 'sf'
]]
rois_df.to_csv(join(src_dir, 'test-rois.csv'), index=False)
rois_df.head()

  0%|          | 0/28 [00:00<?, ?it/s]

  rois_df.loc[i, 'h'] = h
  rois_df.loc[i, 'w'] = w
  rois_df.loc[i, 'sf'] = 0.5


Unnamed: 0,wsi_name,case,wsi_id,Braak_stage,region,fp,mag,h,w,sf
0,E19-35_1_TAU.svs,E19-35,6381487a7f8a5e686a5e00d3,6,Left hippocampus,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,20.0,0.048022,0.021575,0.5
1,E19-70_1_TAU.svs,E19-70,638148817f8a5e686a5e610a,3,Left hippocampus,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,20.0,0.031669,0.025283,0.5
2,E19-139_6_tau.svs,E19-139,6381488b7f8a5e686a5ec193,1,Occipital cortex,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,20.0,2473.0,1351.0,0.5
3,E19-141_6_tau.svs,E19-141,638148957f8a5e686a5f2ca5,6,Occipital cortex,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,20.0,0.015016,0.016728,0.5
4,E08-145_2A_TAU.svs,E08-145,6381489f7f8a5e686a5f95da,0,Hippocampus,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,20.0,0.017768,0.011208,0.5


In [5]:
# Using multi-parallel processing when doing this.
tiles_csv_fp = join(src_dir, 'test-tiles.csv')

# Note: most recent version of shapely throws warnings when there is no 
# intersection between two geometris. This is not an issue but the warnings 
# are annoying. May put a catch later to avoid this.
tiles_df = tile_roi_with_labels_wrapper(
    rois_df.fp.tolist(), 
    join(src_dir, 'test-tiles'), 
    tile_size=640,
    stride=480,
    boundary_thr=0.2,
    nproc=10,
    box_thr=0.5,
    notebook=True
)

tiles_df.to_csv(tiles_csv_fp, index=False)
tiles_df.head()

  0%|          | 0/28 [00:00<?, ?it/s]

  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **kwargs)
  return lib.intersection(a, b, **

Unnamed: 0,fp,roi_fp,x,y,tile_size
0,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,0,0,640
1,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,0,480,640
2,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,0,960,640
3,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,0,1440,640
4,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,/jcDataStore/Data/NeuroTK-Dash/nft-detection/t...,480,0,640


In [6]:
# Create the test.txt file.
lines = '\n'.join(tiles_df.fp)

with open(join(src_dir, 'test-20X-640.txt'), 'w') as fh:
    fh.write(lines.strip())

### Validate Trained Model

In [57]:
# Load a base YOLO model and load pretrained weights.
model = YOLO(
    '/jcDataStore/Data/NeuroTK-Dash/models/nft-ai-project-20X/weights/best.pt'
)

In [58]:
metrics = model.val(
    data='/jcDataStore/Data/NeuroTK-Dash/nft-detection/20X-dataset.yaml',
    task='test',
    device='0,1',
    batch=16,
    imgsz=1280,
)

Ultralytics YOLOv8.0.188 🚀 Python-3.11.5 torch-2.0.1+cu117 CUDA:0 (NVIDIA RTX A4500, 20178MiB)
                                                            CUDA:1 (NVIDIA RTX A4500, 20178MiB)
Model summary (fused): 218 layers, 25840918 parameters, 0 gradients
[34m[1mval: [0mScanning /jcDataStore/Data/NeuroTK-Dash/nft-detection/background-tiles/labels... 86 images, 71 backgrounds, 0 corrupt: 100%|██████████| 157/157 [00:01<00:00, 91.76it/s]
[34m[1mval: [0mNew cache created: /jcDataStore/Data/NeuroTK-Dash/nft-detection/background-tiles/labels.cache
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:13<00:00,  1.40s/it]
                   all        157        942      0.589      0.584      0.557      0.323
               Pre-NFT        157         92      0.502      0.417      0.386      0.199
                  iNFT        157        850      0.676      0.752      0.729      0.447
Speed: 9.8ms preprocess, 26.8ms infer