In [5]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import math
import numpy as np
import re
from shapely.geometry import Polygon, LineString, Point
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
from torch.optim import SGD, Adam, Adadelta
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
from torch.utils.data._utils.collate import default_collate
import torchvision
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
from math import radians, cos, sin
import ast

# Load the Data

In [184]:
# Load JSON data into a dictionary
with open('./data/ds2_dense/deepscores_train.json') as file:
    data1 = json.load(file)
with open('./data/ds2_dense/deepscores_test.json') as file:
    data2 = json.load(file)

In [185]:
train_images = pd.DataFrame( data1['images'])
train_obboxs = pd.DataFrame( data1['annotations']).T

test_images = pd.DataFrame( data2['images'])
test_obboxs = pd.DataFrame( data2['annotations']).T

In [186]:
test_images

Unnamed: 0,id,filename,width,height,ann_ids
0,1,lg-75827152-aug-lilyjazz-.png,1960,2772,"[160131, 160132, 160133, 160134, 160135, 16013..."
1,5,lg-210359136-aug-lilyjazz--page-14.png,1960,2772,"[503778, 503779, 503780, 503781, 503782, 50378..."
2,6,lg-366136986510816260-aug-gutenberg1939-.png,1960,2772,"[769765, 769766, 769767, 769768, 769769, 76977..."
3,17,lg-135180926-aug-gonville-.png,1960,2772,"[442270, 442271, 442272, 442273, 442274, 44227..."
4,25,lg-36421666-aug-lilyjazz--page-1.png,1960,2772,"[792112, 792113, 792114, 792115, 792116, 79211..."
...,...,...,...,...,...
347,1693,lg-751243026891245984-aug-emmentaler-.png,1960,2772,"[398497, 398498, 398499, 398500, 398501, 39850..."
348,1695,lg-120452889-aug-emmentaler--page-8.png,1960,2772,"[532652, 532653, 532654, 532655, 532656, 53265..."
349,1700,lg-47048563-aug-beethoven--page-2.png,1960,2772,"[1025538, 1025539, 1025540, 1025541, 1025542, ..."
350,1711,lg-28294781-aug-emmentaler--page-2.png,1960,2772,"[327289, 327290, 327291, 327292, 327293, 32729..."


# Splitting the Images into Train and Test sets

Note this step probably isn't necessary if you used the entire images directory for training; this part just splits it up into training and testing based on the files specified in the train and test jsons.

In [31]:
import shutil

In [22]:
# train_dir = './data/ds2_dense/train'
# test_dir = './data/ds2_dense/test'
# os.makedirs(train_dir, exist_ok=True)
# os.makedirs(test_dir, exist_ok=True)

In [35]:
# image_dir = './data/ds2_dense/images'

In [33]:
# src_path

'./data/ds2_dense/lg-94161796-aug-gonville--page-3.png'

In [36]:
# # Move train images to train directory
# for image_filename in train_images['filename']:
#     src_path = os.path.join(image_dir, image_filename)
#     dest_path = os.path.join(train_dir, image_filename)
#     shutil.move(src_path, dest_path)

In [38]:
# for image_filename in test_images['filename']:
#     src_path = os.path.join(image_dir, image_filename)
#     dest_path = os.path.join(test_dir, image_filename)
#     shutil.move(src_path, dest_path)

# Labels

<span style="background-color:rgb(255,255,150)">The yolo model wants labels to be zero-based indexing, so I subtracted 1</span>


In [188]:
raw_labels = pd.read_csv('new_labels.csv')
raw_labels['label'] -= 1
raw_labels.head()

Unnamed: 0,old_index,old_id,name,dataset,color,label
0,0,1,brace,deepscores,1,0
1,1,137,brace,muscima++,1,0
2,2,2,ledgerLine,deepscores,2,1
3,3,138,ledgerLine,muscima++,2,1
4,4,3,repeatDot,deepscores,7,2


In [189]:
unique_labels = raw_labels[['label', 'name']]
unique_labels = unique_labels.drop_duplicates(subset=['label'])
unique_labels = unique_labels.sort_values(by=['label']).reset_index(drop=True)

In [190]:
unique_labels

Unnamed: 0,label,name
0,0,brace
1,1,ledgerLine
2,2,repeatDot
3,3,segno
4,4,coda
...,...,...
151,151,tuplet9
152,152,tupletBracket
153,153,ottavaBracket
154,154,staff


# Yaml file for Training 

With `Yolo`, a yaml file is specified for training. Below are common key-value pairs in these dataset yaml files: 

- **path**: Specifies the root directory of the dataset.
- **train**: Specifies the path to the directory containing training images. This path is usually relative to the dataset root directory - -- -specified in the 'path' key.
- **val**: Specifies the path to the directory containing validation images. Similar to 'train', this path is relative to the dataset root directory.
- **test**: Specifies the path to the directory containing test images, if applicable. This path is also relative to the dataset root directory.
- **names**: Contains label-name mappings for the classes in the dataset. It typically consists of a dictionary where the keys are class IDs (usually integers) and the values are the corresponding class names (strings).
- **download**: Optional key that may specify a URL or script for downloading the dataset.

In [191]:
import yaml

Based on what is online, it seems people ran into issues using relative paths, so it is probably best to always use absolute paths when specifyng the path to the `path` key 

In [192]:
# Get the absolute path of where this is running
notebook_dir = os.getcwd()

# Append the directory 'ds2_dense/data' for the data path
data_path = os.path.join(notebook_dir, 'data', 'ds2_dense')

print("Data directory:", data_path)

Data directory: /Users/bwhom/Desktop/OMR/data/ds2_dense


In [193]:
def generate_yaml_from_dataframe(df):
    """
    Generate the 'names' section of YAML from a DataFrame containing label and name mappings.

    Input:
        df (DataFrame): DataFrame containing 'label' and 'name' columns.

    Output:
        str: YAML-formatted text for the 'names' section.
    """
    yaml_text = "names:\n"
    
    for index, row in df.iterrows():
        yaml_text += f"  {row['label']}: {row['name']}\n"
        
    return yaml_text

In [194]:
def write_yaml_dataset(path, train_path, val_path, label_df=None, filename='deep_scores.yaml'):
    """
    Write dataset paths and label-name mappings to a YAML file.

    Args:
        path (str): Dataset root directory.
        train_path (str): Path to train images directory (relative to the 'path').
        test_path (str): Path to test images directory (relative to 'path').
        label_df (DataFrame): DataFrame containing label and name mappings. 
        filename (str): Name of the YAML file to write.
    """
    # Data
    data = {
        'path': path,
        'train': train_path,
    }

    if val_path is not None:
        data['val'] = val_path

    if label_df is not None:
        label_yaml = generate_yaml_from_dataframe(label_df)
        data['names'] = yaml.load(label_yaml, Loader=yaml.FullLoader)

    # Convert data to YAML string
    yaml_text = "path: {}\ntrain: {}\n".format(path, train_path)
    if test_path is not None:
        yaml_text += "val: {}\n".format(val_path)

    if label_df is not None:
        yaml_text += label_yaml

    # Write data to YAML file
    with open(filename, 'w') as yaml_file:
        yaml_file.write(yaml_text)





In [195]:
# Specify function arguments 
train_path = 'images/train'
val_path = 'images/test'
label_df = unique_labels


In [196]:
write_yaml_dataset(data_path, 
           train_path, 
           val_path, 
           label_df)

# Prepare Annotations/images

Code taken from Daniel :) 

In [197]:
train_images.rename(columns={'id': 'img_id'}, inplace=True)
test_images.rename(columns={'id': 'img_id'}, inplace=True)
test_images.head(3)

Unnamed: 0,img_id,filename,width,height,ann_ids
0,1,lg-75827152-aug-lilyjazz-.png,1960,2772,"[160131, 160132, 160133, 160134, 160135, 16013..."
1,5,lg-210359136-aug-lilyjazz--page-14.png,1960,2772,"[503778, 503779, 503780, 503781, 503782, 50378..."
2,6,lg-366136986510816260-aug-gutenberg1939-.png,1960,2772,"[769765, 769766, 769767, 769768, 769769, 76977..."


In [198]:
# remap the class labels
class_mapping = dict(zip(raw_labels['old_id'].astype(str), raw_labels['label']))

# Define a function to replace each cat_id list with corresponding class names
def map_cat_ids_to_classes(cat_ids):
    return [class_mapping.get(str(cat_id)) for cat_id in cat_ids]

def clean_labels(label_list):
    # Use a set comprehension to remove duplicates and filter out None values
    return list({label for label in label_list if label is not None})
    
def select_highest_precedence(label_list):
    return max(label_list)

# Apply this function to the cat_id column in train and test obboxs DataFrames
train_obboxs['label'] = train_obboxs['cat_id'].apply(map_cat_ids_to_classes)
test_obboxs['label'] = test_obboxs['cat_id'].apply(map_cat_ids_to_classes)
train_obboxs['label'] = train_obboxs['label'].apply(clean_labels)
test_obboxs['label'] = test_obboxs['label'].apply(clean_labels)
train_obboxs['label'] = train_obboxs['label'].apply(select_highest_precedence)
test_obboxs['label'] = test_obboxs['label'].apply(select_highest_precedence)
train_obboxs.head()

Unnamed: 0,a_bbox,o_bbox,cat_id,area,img_id,comments,label
1020,"[116.0, 139.0, 2315.0, 206.0]","[2315.0, 206.0, 2315.0, 139.0, 116.0, 139.0, 1...","[135, 208]",18945,679,instance:#000010;,154
1021,"[116.0, 309.0, 2315.0, 376.0]","[2315.0, 376.0, 2315.0, 309.0, 116.0, 309.0, 1...","[135, 208]",19223,679,instance:#000021;,154
1022,"[1880.0, 561.0, 1911.0, 564.0]","[1911.0, 564.0, 1911.0, 561.0, 1880.0, 561.0, ...","[2, 138]",120,679,instance:#000022;,1
1023,"[1883.0, 578.0, 1911.0, 580.0]","[1911.0, 580.0, 1911.0, 578.0, 1883.0, 578.0, ...","[2, 138]",27,679,instance:#000023;,1
1024,"[1827.0, 561.0, 1857.0, 564.0]","[1857.0, 564.0, 1857.0, 561.0, 1827.0, 561.0, ...","[2, 138]",112,679,instance:#000024;,1


In [199]:
# Function to extract duration and relative position from comments
def extract_info(comment):
    duration = re.search(r'duration:(\d+);', comment)
    rel_position = re.search(r'rel_position:(-?\d+);', comment)
    return [int(duration.group(1)) if duration else None, int(rel_position.group(1)) if rel_position else None]
    
# Apply the function to create new columns
train_obboxs[['duration', 'rel_position']] = train_obboxs['comments'].apply(extract_info).tolist()
test_obboxs[['duration', 'rel_position']] = test_obboxs['comments'].apply(extract_info).tolist()
train_obboxs.iloc[100:104]

Unnamed: 0,a_bbox,o_bbox,cat_id,area,img_id,comments,label,duration,rel_position
1120,"[1295.0, 134.0, 1296.0, 186.0]","[1296.0, 186.0, 1296.0, 134.0, 1295.0, 134.0, ...","[42, 161]",105,679,instance:#000089;,61,,
1121,"[1321.0, 612.0, 1341.0, 629.0]","[1343.3448486328125, 623.862060546875, 1337.48...","[27, 157]",276,679,instance:#00008a;duration:16;rel_position:-13;,44,16.0,-13.0
1122,"[1340.0, 513.0, 1341.0, 617.0]","[1341.0, 617.0, 1341.0, 513.0, 1340.0, 513.0, ...","[42, 161]",201,679,instance:#00008b;,61,,
1123,"[1358.0, 156.0, 1378.0, 172.0]","[1378.0, 156.0, 1358.0, 156.0, 1358.0, 172.0, ...","[27, 157]",279,679,instance:#00008c;duration:8;rel_position:1;,44,8.0,1.0


In [200]:
# create a mask for the duration to mark where the duration is relevent
train_obboxs['duration_mask'] = train_obboxs['duration'].notna().astype(int)
test_obboxs['duration_mask'] = test_obboxs['duration'].notna().astype(int)
# set items with no duration to -1
# we may need to reapproach this with another method
train_obboxs['duration'] = train_obboxs['duration'].replace(np.nan,-1)
test_obboxs['duration'] = test_obboxs['duration'].replace(np.nan,-1)

# create a mask for the rel_position to mark where the rel_position is relevent
train_obboxs['rel_position_mask'] = train_obboxs['rel_position'].notna().astype(int)
test_obboxs['rel_position_mask'] = test_obboxs['rel_position'].notna().astype(int)
# set items with no rel_position to 50 (nothing has a position this high)
# we may need to reapproach this with a KNN inference
train_obboxs['rel_position'] = train_obboxs['rel_position'].replace(np.nan,50)
test_obboxs['rel_position'] = test_obboxs['rel_position'].replace(np.nan,50)
train_obboxs.iloc[100:104]

Unnamed: 0,a_bbox,o_bbox,cat_id,area,img_id,comments,label,duration,rel_position,duration_mask,rel_position_mask
1120,"[1295.0, 134.0, 1296.0, 186.0]","[1296.0, 186.0, 1296.0, 134.0, 1295.0, 134.0, ...","[42, 161]",105,679,instance:#000089;,61,-1.0,50.0,0,0
1121,"[1321.0, 612.0, 1341.0, 629.0]","[1343.3448486328125, 623.862060546875, 1337.48...","[27, 157]",276,679,instance:#00008a;duration:16;rel_position:-13;,44,16.0,-13.0,1,1
1122,"[1340.0, 513.0, 1341.0, 617.0]","[1341.0, 617.0, 1341.0, 513.0, 1340.0, 513.0, ...","[42, 161]",201,679,instance:#00008b;,61,-1.0,50.0,0,0
1123,"[1358.0, 156.0, 1378.0, 172.0]","[1378.0, 156.0, 1358.0, 156.0, 1358.0, 172.0, ...","[27, 157]",279,679,instance:#00008c;duration:8;rel_position:1;,44,8.0,1.0,1,1


In [201]:
def adjust_bbox(bbox):
    x_min, y_min, x_max, y_max = bbox
    if x_min == x_max:
        x_min -= 1
        x_max += 1
    if y_min == y_max:
        y_min -= 1
        y_max += 1
    return [x_min, y_min, x_max, y_max]

In [202]:
# Apply the function to the 'a_bbox' column of the DataFrame
train_obboxs['padded_bbox'] = train_obboxs['a_bbox'].apply(adjust_bbox)
test_obboxs['padded_bbox'] = test_obboxs['a_bbox'].apply(adjust_bbox)
train_obboxs['padded_bbox'] = train_obboxs['padded_bbox'].apply(adjust_bbox)
test_obboxs['padded_bbox'] = test_obboxs['padded_bbox'].apply(adjust_bbox)
train_obboxs.head(1)

Unnamed: 0,a_bbox,o_bbox,cat_id,area,img_id,comments,label,duration,rel_position,duration_mask,rel_position_mask,padded_bbox
1020,"[116.0, 139.0, 2315.0, 206.0]","[2315.0, 206.0, 2315.0, 139.0, 116.0, 139.0, 1...","[135, 208]",18945,679,instance:#000010;,154,-1.0,50.0,0,0,"[116.0, 139.0, 2315.0, 206.0]"


In [203]:
# clean up
train_obboxs.reset_index(inplace=True)
test_obboxs.reset_index(inplace=True)
train_obboxs.drop(['cat_id','comments'], axis=1, inplace=True)
test_obboxs.drop(['cat_id','comments'], axis=1, inplace=True)
train_obboxs.rename(columns={'index': 'ann_id'}, inplace=True)
test_obboxs.rename(columns={'index': 'ann_id'}, inplace=True)
train_obboxs['ann_id'] = train_obboxs['ann_id'].astype(int)
test_obboxs['ann_id'] = test_obboxs['ann_id'].astype(int)
train_obboxs['area'] = train_obboxs['area'].astype(int)
test_obboxs['area'] = test_obboxs['area'].astype(int)
train_obboxs['img_id'] = train_obboxs['img_id'].astype(int)
test_obboxs['img_id'] = test_obboxs['img_id'].astype(int)
test_obboxs.iloc[100:104]

Unnamed: 0,ann_id,a_bbox,o_bbox,area,img_id,label,duration,rel_position,duration_mask,rel_position_mask,padded_bbox
100,101,"[1466.0, 338.0, 1467.0, 413.0]","[1467.0, 413.0, 1467.0, 338.0, 1466.0, 338.0, ...",152,1180,61,-1.0,50.0,0,0,"[1466.0, 338.0, 1467.0, 413.0]"
101,102,"[1500.0, 211.0, 1520.0, 228.0]","[1522.0, 224.00001525878906, 1517.0, 209.00001...",271,1180,44,8.0,-3.0,1,1,"[1500.0, 211.0, 1520.0, 228.0]"
102,103,"[1500.0, 318.0, 1520.0, 335.0]","[1523.0, 325.0, 1512.0, 314.0, 1497.5, 328.5, ...",275,1180,42,8.0,4.0,1,1,"[1500.0, 318.0, 1520.0, 335.0]"
103,104,"[1519.0, 136.0, 1520.0, 217.0]","[1520.0, 217.0, 1520.0, 136.0, 1519.0, 136.0, ...",164,1180,61,-1.0,50.0,0,0,"[1519.0, 136.0, 1520.0, 217.0]"


In [204]:
train_data = pd.merge(train_obboxs, train_images, on='img_id', how='inner')
test_data = pd.merge(test_obboxs, test_images, on='img_id', how='inner')
train_data.drop('ann_ids', axis=1, inplace=True)
test_data.drop('ann_ids', axis=1, inplace=True)
train_data.head(1)

Unnamed: 0,ann_id,a_bbox,o_bbox,area,img_id,label,duration,rel_position,duration_mask,rel_position_mask,padded_bbox,filename,width,height
0,1020,"[116.0, 139.0, 2315.0, 206.0]","[2315.0, 206.0, 2315.0, 139.0, 116.0, 139.0, 1...",18945,679,154,-1.0,50.0,0,0,"[116.0, 139.0, 2315.0, 206.0]",lg-877777775968732096-aug-gonville--page-3.png,2431,3439


<span style='color:red'>Don't have this `barline_annotations.csv`</span>

In [None]:
barlines_df = pd.read_csv('./barline_annotations.csv')

def convert_str_to_list(coord_str):
    return ast.literal_eval(coord_str)

barlines_df['a_bbox'] = barlines_df['a_bbox'].apply(convert_str_to_list)
barlines_df['o_bbox'] = barlines_df['o_bbox'].apply(convert_str_to_list)
barlines_df['padded_bbox'] = barlines_df['padded_bbox'].apply(convert_str_to_list)

missing_annotations = barlines_df[barlines_df['filename'].isin(train_data['filename'])]
train_data = pd.concat([train_data, missing_annotations], ignore_index=True)

missing_annotations = barlines_df[barlines_df['filename'].isin(test_data['filename'])]
test_data = pd.concat([test_data, missing_annotations], ignore_index=True)

In [205]:
def corners_to_yolo(bbox, img_width, img_height):
    polygon = Polygon([(bbox[i], bbox[i + 1]) for i in range(0, len(bbox), 2)])
    min_rect = polygon.minimum_rotated_rectangle

    # Check if the minimum rotated rectangle is a point
    if isinstance(min_rect, Point):
        # Handle the case where the shape is a point by creating a small box around it
        x, y = min_rect.x, min_rect.y
        min_rect = Polygon([(x-1, y-1), (x+1, y-1), (x+1, y+1), (x-1, y+1)])
        return 'invalid'

    # check if symbol is a line and add 1 px padding if so (almost always stems)
    elif isinstance(min_rect, LineString):
        # Handle the case where the shape is a line by padding
        x_coords, y_coords = zip(*min_rect.coords)
        min_x, max_x = min(x_coords), max(x_coords)
        min_y, max_y = min(y_coords), max(y_coords)
        min_rect = Polygon([(min_x-1, min_y-1), (max_x+1, min_y-1), (max_x+1, max_y+1), (min_x-1, max_y+1)])

    corners = np.array(min_rect.exterior.coords)
    edge1 = np.linalg.norm(corners[0] - corners[1])
    edge2 = np.linalg.norm(corners[1] - corners[2])
    width = max(edge1, edge2)
    height = min(edge1, edge2)
    center = min_rect.centroid.coords[0]
    center_x = center[0]
    center_y = center[1]
    angle = np.rad2deg(np.arctan2(corners[1][1] - corners[0][1], corners[1][0] - corners[0][0]))

    center_x /= img_width
    center_y /= img_height
    width /= img_width
    height /= img_height

    return [center_x, center_y, width, height, angle]
    
# Function to convert corners to YOLO format for each row in the DataFrame
def apply_corners_to_yolo(row):
    return corners_to_yolo(row['o_bbox'], row['width'], row['height'])

In [206]:
# Add a column with bounding boxes in (center x, center y, W, H, R)*normalized format
train_data['yolo_bbox'] = train_data.apply(apply_corners_to_yolo, axis=1)
test_data['yolo_bbox'] = test_data.apply(apply_corners_to_yolo, axis=1)
# drop invalid boxes
train_data = train_data[train_data['yolo_bbox']!='invalid']
test_data = test_data[test_data['yolo_bbox']!='invalid']
train_data.head(1)

Unnamed: 0,ann_id,a_bbox,o_bbox,area,img_id,label,duration,rel_position,duration_mask,rel_position_mask,padded_bbox,filename,width,height,yolo_bbox
0,1020,"[116.0, 139.0, 2315.0, 206.0]","[2315.0, 206.0, 2315.0, 139.0, 116.0, 139.0, 1...",18945,679,154,-1.0,50.0,0,0,"[116.0, 139.0, 2315.0, 206.0]",lg-877777775968732096-aug-gonville--page-3.png,2431,3439,"[0.5, 0.05015993021227101, 0.904566022213081, ..."


In [207]:
train_data_agg = train_data.groupby('filename').agg({
    'yolo_bbox': lambda x: x,
    'label': lambda x :list(x)

}).reset_index()
test_data_agg = test_data.groupby('filename').agg({
    'yolo_bbox': lambda x: list(x),
    'label': lambda x :list(x)
}).reset_index()

# Convert dataframe to directory of text files 

To train a yolo model, you will also need a directory with the labels as a `.txt` text file 

Each line of a text file corresponds to a class and its coordinates in the following format: 

**{object-class} {x_center} {y_center} {width} {height}**

Something like this in a text file 
<br>
0 0.5 0.5 0.2 0.4 
<br>
1 0.7 0.3 0.3 0.3
<br>
2 0.3 0.6 0.5 0.5



In [208]:
train_data_agg.head(1)

Unnamed: 0,filename,yolo_bbox,label
0,lg-101766503886095953-aug-beethoven--page-1.png,"[[0.5354591836734693, 0.14772727272727273, 0.8...","[154, 154, 1, 1, 154, 154, 61, 42, 111, 44, 61..."


In [209]:
def df_to_yolo_text_format(df, output_dir):

    # Check if the directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for index, row in df.iterrows():
        filename = row['filename']
        yolo_bbox = row['yolo_bbox']
        label = row['label']
        
        # Create text file path
        text_file_path = os.path.join(output_dir, os.path.splitext(filename)[0] + '.txt')
        
        # Open text file for writing
        with open(text_file_path, 'w') as text_file:
            for bbox, class_label in zip(yolo_bbox, label):
                x_center = bbox[0]
                y_center = bbox[1]
                bbox_width = bbox[2]
                bbox_height = bbox[3]
                
                # Write YOLO format bounding box to text file
                text_file.write(f"{class_label} {x_center} {y_center} {bbox_width} {bbox_height}\n")




In [210]:
train_label_dir = os.path.join(data_path, "labels", "train")
test_label_dir = os.path.join(data_path,"labels", "test")

In [211]:
df_to_yolo_text_format(train_data_agg, train_label_dir)

In [212]:
df_to_yolo_text_format(test_data_agg, test_label_dir)

# Yolo model

In [1]:
from ultralytics import YOLO

Note if the model doesn't exist it will automatically download it as a `.pt` in the directory that the code is ran in

In [214]:
model = YOLO('yolov8n.pt')

In [216]:
results = model.train(data="deep_scores.yaml", epochs = 1, device="mps")

Ultralytics YOLOv8.2.4 🚀 Python-3.11.9 torch-2.3.0 MPS (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=deep_scores.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, lin

[34m[1mtrain: [0mScanning /Users/bwhom/Desktop/OMR/data/ds2_dense/labels/train.cache... 1362 images, 0 backgrounds, 0 corrupt: 100%|████[0m
[34m[1mval: [0mScanning /Users/bwhom/Desktop/OMR/data/ds2_dense/labels/test.cache... 352 images, 0 backgrounds, 0 corrupt: 100%|████████[0m

Plotting labels to /opt/homebrew/runs/detect/train4/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=6.3e-05, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/opt/homebrew/runs/detect/train4[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G      4.479      5.868       2.39        500        640: 100%|██████████| 86/86 [11:38<00:00,  8.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/11 [00:00<?, ?it/



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   9%|▉         | 1/11 [01:22<13:45, 


RuntimeError: MPS backend out of memory (MPS allocated: 17.27 GB, other allocations: 22.47 GB, max allowed: 36.27 GB). Tried to allocate 91.50 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).