In [None]:
from roboflow import Roboflow
from sklearn.model_selection import train_test_split
import os
import yaml

dataset_path = os.getcwd() + "/datasets/taiwan_pill_for_label"

#download from roboflow
rf = Roboflow(api_key="zCMD7YHOUliH3cklpoLi")
project = rf.workspace("doccampill").project("taiwan_pill_for_label")
version = project.version(35)
dataset = version.download("yolov11", location=dataset_path)


#rename without added roboflow extension
train_path_imgs = dataset_path + '/train/images/'
train_path_labels = dataset_path + '/train/labels/'
valid_path_imgs = dataset_path + '/valid/images/'
valid_path_labels = dataset_path + '/valid/labels/'
test_path_imgs = dataset_path + '/test/images/'
test_path_labels = dataset_path + '/test/labels/'

for file in os.listdir(train_path_imgs):
    old_name = os.path.basename(file)
    new_name = old_name.split('_')[0]
    os.rename(train_path_imgs + old_name, train_path_imgs + new_name + '.jpg')
    os.rename(train_path_labels + old_name.replace('.jpg', '.txt'), train_path_labels + new_name + '.txt')

#make directories for test and valid
os.makedirs(valid_path_imgs, exist_ok=True)
os.makedirs(valid_path_labels, exist_ok=True)
os.makedirs(test_path_imgs, exist_ok=True)
os.makedirs(test_path_labels, exist_ok=True)

#partition data into train/valid/test
x = []
y = []

for file in os.listdir(train_path_labels):
    label = []
    with open(train_path_labels + file, 'r') as f:
        for line in f:
            label.append(line.split(' ')[0])
    f.close()
    if (len(set(label)) > 1):
        print('Error: multiple classes in file')
        print(file)
        break
    else:
        x.append(file)
        y.append(label[0])

#split data (train data is stratified as to keep proportions similar to overall proportions)
x_train, x_temp, y_train, y_temp = train_test_split(x,y,test_size=.2, random_state=42, stratify=y)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=.5, random_state=42)

#move images and labels from valid partition into valid folder
for file in x_val:
    os.rename(train_path_labels + file, valid_path_labels + file)
    os.rename(train_path_imgs + file.replace('.txt', '.jpg'), valid_path_imgs + file.replace('.txt', '.jpg'))

#move images and labels from test into test folder
for file in x_test:
    os.rename(train_path_labels + file, test_path_labels + file)
    os.rename(train_path_imgs + file.replace('.txt', '.jpg'), test_path_imgs + file.replace('.txt', '.jpg'))

#edit yaml
with open(dataset_path + '/data.yaml', 'r') as file:
    data = {'path': dataset_path}
    data.update(yaml.safe_load(file))
file.close()

data['names'] = {idx: val for idx, val in enumerate(data['names'])}
data['train'] = 'train/images'
data['val'] = 'valid/images'
data['test'] = 'test/images'

with open(dataset_path + '/data.yaml', 'w') as file:
    yaml.safe_dump(data, file, sort_keys=False)
file.close()

In [None]:
import kagglehub
import yaml

# Download latest version of ogyeiv2 dataset from kaggle
dataset_path = kagglehub.dataset_download("richardradli/ogyeiv2")
print("Path to dataset files:", dataset_path)

#write a yaml file for dataset
dataset_path = dataset_path + '/ogyeiv2/ogyeiv2'
data = {'path': dataset_path, 'train': 'train/images', 'val': 'valid/images', 'test': 'test/images', 'names': {0: 'pill'}}

with open(dataset_path + '/data.yaml', 'w') as file:
    yaml.safe_dump(data, file, sort_keys=False)
file.close()

Path to dataset files: /Users/Amanda/.cache/kagglehub/datasets/richardradli/ogyeiv2/versions/3


In [None]:
import ultralytics

#train on yolo11 segmantation on dataset
model = ultralytics.YOLO('yolo11n-seg.pt')
results = model.train(data=dataset_path + '/data.yaml', fraction=1, epochs=10, single_cls=True, plots=True)