In [1]:
import numpy as np
import cv2
import pickle
from pathlib import Path
from ultralytics import YOLO
from tqdm import tqdm
import random
import shutil
from pathlib import Path
import yaml

### Подготовка данных для обучения

In [21]:
def mask_to_yolo_seg(mask, class_id, image_width, image_height, min_area=50):
    if len(mask.shape) == 3:
        mask = mask[:, :, 0]
    
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    yolo_lines = []
    for contour in contours:
        if len(contour) < 3:
            continue
            
        area = cv2.contourArea(contour)
        if area < min_area:
            continue
        
        contour = contour.squeeze().astype(float)
        if len(contour.shape) == 1:
            continue
            
        contour[:, 0] /= image_width
        contour[:, 1] /= image_height
        
        contour = contour.flatten()
        
        line = f'{class_id} ' + ' '.join([f'{x:.6f}' for x in contour])
        yolo_lines.append(line)
    
    return yolo_lines

In [None]:
masks_info = pickle.load(open('../masks/masks_info.pkl', 'rb'))
output_dir = Path('../yolo_seg_labels')
output_dir.mkdir(exist_ok=True)
class_mapping = {
    1: 0,
    2: 1
}

for info in tqdm(masks_info):
    file_name = info['file_name']
    mask_name = info['mask_name']
    classes = info['classes']
    
    mask = cv2.imread(f'../masks/{mask_name}', cv2.IMREAD_GRAYSCALE)
    image = cv2.imread(f'../train/{file_name}')
    h, w = image.shape[:2]
    
    unique_classes = set(classes)
    all_lines = []
    
    for class_value in unique_classes:
        if class_value == 0:
            continue
            
        class_id = class_mapping[class_value]
        class_mask = np.zeros_like(mask)
        class_mask[mask == class_value] = 255
        lines = mask_to_yolo_seg(class_mask, class_id, w, h)
        all_lines.extend(lines)
    
    txt_path = output_dir / (Path(file_name).stem + '.txt')
    with open(txt_path, 'w') as f:
        f.write('\n'.join(all_lines))

100%|██████████| 994/994 [01:03<00:00, 15.69it/s]


___
### Сплит на трейн/тест

In [25]:
images_dir = Path('../train')
labels_dir = Path('../yolo_seg_labels')
output_dir = Path('../dataset')

train_ratio = 0.8
val_ratio = 0.2

random.seed(42)

image_files = list(images_dir.glob('*.jpg'))
random.shuffle(image_files)

train_count = int(len(image_files) * train_ratio)
train_files = image_files[:train_count]
val_files = image_files[train_count:]

for split in ['train', 'val']:
    (output_dir / split / 'images').mkdir(parents=True, exist_ok=True)
    (output_dir / split / 'labels').mkdir(parents=True, exist_ok=True)

for img_file in train_files:
    label_file = labels_dir / (img_file.stem + '.txt')
    if label_file.exists():
        shutil.copy(img_file, output_dir / 'train' / 'images' / img_file.name)
        shutil.copy(label_file, output_dir / 'train' / 'labels' / label_file.name)

for img_file in val_files:
    label_file = labels_dir / (img_file.stem + '.txt')
    if label_file.exists():
        shutil.copy(img_file, output_dir / 'val' / 'images' / img_file.name)
        shutil.copy(label_file, output_dir / 'val' / 'labels' / label_file.name)

data_yaml = {
    'path': str(output_dir.absolute()),
    'train': 'train/images',
    'val': 'val/images',
    'nc': 2,
    'names': ['tree', 'road']
}

with open(output_dir / 'data.yaml', 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

print(f'Всего изображений: {len(image_files)}')
print(f'train: {len(train_files)}')
print(f'val: {len(val_files)}')
print(f'data.yaml создан в {output_dir / "data.yaml"}')

Всего изображений: 1000
train: 800
val: 200
data.yaml создан в ..\dataset\data.yaml
