In [9]:
from PIL import Image
import numpy as np
import random
import matplotlib.pyplot as plt

In [10]:
def main(num_img=1000):
    for idx_dice in range(1, 7):
        # 読み込み
        image_path = f'./dice_samples/sample_dice1_{idx_dice}.png'
        original_image = Image.open(image_path).convert('RGBA')

        # ダウンスケーリング
        downscaled_size = (700, 700)
        downscaled_image = original_image.resize(downscaled_size)
        for idx_img in range(num_img):
            # 背景画像生成
            background_size = (2000, 2000)
            background = Image.new("RGB", background_size, (0, 0, 0))

            # ランダムな角度で画像を回転
            random_angle = random.randint(0, 360)
            rotated_image = downscaled_image.rotate(random_angle, expand=True)

            # 回転後の画像サイズを取得
            rotated_size = rotated_image.size

            # 回転後の画像が背景内に収まるように座標を制限する
            max_x_rotated = background_size[0] - rotated_size[0]
            max_y_rotated = background_size[1] - rotated_size[1]

            # 座標が負にならないようにする
            max_x_rotated = max(max_x_rotated, 0)
            max_y_rotated = max(max_y_rotated, 0)

            # 回転後の画像を配置するランダムな位置を決定
            random_x_rotated = random.randint(0, max_x_rotated)
            random_y_rotated = random.randint(0, max_y_rotated)

            # 新しい黒い背景画像を生成（前の画像は破棄）
            background_rotated = Image.new("RGB", background_size, (0, 0, 0))

            # 回転した画像を背景に合成
            background_rotated.paste(rotated_image, (random_x_rotated, random_y_rotated))

            # 合成された画像をグレースケールに変換
            grayscale_image_rotated = background_rotated.convert("L")

            # グレースケール画像を表示
            #grayscale_image_rotated.show()

            # グレースケール画像のパス
            grayscale_image_rotated = grayscale_image_rotated.resize((20, 20))
            grayscale_image_rotated = grayscale_image_rotated.resize((80, 80))
            grayscale_image_path = f'./output/original_set/images/{num_img * (idx_dice - 1)+idx_img + 1}.png'
            grayscale_image_rotated.save(grayscale_image_path)


                        # アノテーションの計算
            bbox_center_x = random_x_rotated + rotated_size[0] / 2
            bbox_center_y = random_y_rotated + rotated_size[1] / 2
            rel_center_x = bbox_center_x / background_size[0]
            rel_center_y = bbox_center_y / background_size[1]
            rel_width = rotated_size[0] / background_size[0]
            rel_height = rotated_size[1] / background_size[1]

            # アノテーションのフォーマットと保存
            annotation = f"{idx_dice-1} {rel_center_x} {rel_center_y} {rel_width} {rel_height}\n"
            annotation_path = f'./output/original_set/labels/{num_img * (idx_dice - 1)+idx_img + 1}.txt'
            with open(annotation_path, 'w') as file:
                file.write(annotation)    



            #plt.imshow(grayscale_image_rotated, cmap="gray")
            #plt.colorbar()
            #plt.show()
        

In [11]:
if __name__ == '__main__':
    main()

In [12]:
#train,val, testの自動分割
import glob
import os
import random

dataset_dir="./output/original_set"

#データ一覧
img_list=glob.glob(os.path.join(dataset_dir+"/labels","*.txt"))

#データをシャッフル
random.shuffle(img_list)

#8:1:1に分割
num_data=len(img_list)
num_train=int(num_data*0.8)
num_val=int(num_data*0.1)
num_test=num_data-num_train-num_val

#分割
split_dict={}
split_dict["train"]=img_list[:num_train]
split_dict["valid"]=img_list[num_train:num_train+num_val]
split_dict["test"]=img_list[num_train+num_val:]


for name in ["train","test","valid"]:
    #フォルダ作成
    dir_name=os.path.join(dataset_dir,name)
    if not os.path.exists(dir_name):
        os.mkdir(dir_name)

    #images,labelsフォルダ作成
    for folder in ["images","labels"]:
        dir_name2=os.path.join(dir_name,folder)
        if not os.path.exists(dir_name2):
            os.mkdir(dir_name2)

        #コピー
        for path in split_dict[name]:
            txt_path=path
            img_path=path.replace("labels","images").replace(".txt",".png")
            if dir_name2.find("labels")>0:
                os.system("cp {} {}".format(txt_path,dir_name2))
            else:
                os.system("cp {} {}".format(img_path,dir_name2))

In [2]:
from ultralytics import YOLO
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
model.train(data="./output/original_set/data.yaml", epochs=20)  # train the model
metrics = model.val()  # evaluate model performance on the validation set

Ultralytics YOLOv8.0.203 🚀 Python-3.9.18 torch-2.1.0+cu121 CUDA:0 (NVIDIA RTX A4500, 20170MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=./output/original_set/data.yaml, epochs=20, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train9, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torc