In [1]:
%pip install -r yolov5/requirements.txt --user 
%pip install sklearn scikit-image azureml-core --user
%pip install sklearn --user

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
from util import labeledImage, normalize_coordinates, convert_to_yolo_format
from sklearn.model_selection import train_test_split
import os, shutil, yaml

In [3]:
# Set the path to the root folder that contains images and labels
# Example:
#    - PC: path_to_Mse544_cv = 'C:/Users/Luna/DataScience/Mse544-CustomVision'
#    - Mac: path_to_Mse544_cv = '/Users/luna/Mse-544-CustomVision'
path_to_Mse544_cv = '<enter path here>'

source_images_dir = f'/Users/tannerjameshill/Downloads/Mse544-CustomVision-main/molecules/'
source_labels_dir = f'/Users/tannerjameshill/Downloads/Mse544-CustomVision-main/molecules/labels/'

labeled_images = []
tag = 'molecule' 

for file in os.listdir(source_images_dir):
    # find all jpeg file and it's ImageJ label
    if file.endswith(".jpeg"):
        image_path = os.path.join(source_images_dir, file)
        label_path = os.path.join(source_labels_dir, file.split('.')[0] + '.txt')
        labeled_images.append(labeledImage(image_path))
        labeled_images[-1].add_labels_from_file(tag, label_path)

In [4]:
train_and_val_set, test_set = train_test_split(labeled_images, test_size=0.1)
train_set, val_set = train_test_split(train_and_val_set, test_size=(2/9))

len(train_set), len(val_set), len(test_set)

(41, 12, 6)

In [5]:
# Create the molecule_images directory if it doesn't exist
output_dir = os.path.join(os.getcwd(),'molecule_images')
if not os.path.exists(output_dir): os.mkdir(output_dir)

train_dir = os.path.join(output_dir, 'train') 
val_dir   = os.path.join(output_dir, 'val') 
test_dir  = os.path.join(output_dir, 'test') 

# Create the sub-directories
for d in [train_dir, val_dir, test_dir]:
    if not os.path.exists(d): os.mkdir(d)
    
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')
    
    for sub_dir in [images_sub_dir, labels_sub_dir]:
        if not os.path.exists(sub_dir): os.mkdir(sub_dir)

In [6]:
# make unified yolo tags 
tags = [tag]

# zip the dataset
dataset = [(train_dir, train_set),(val_dir, val_set),(test_dir, test_set)]

for d, s in dataset:
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')

    # copy over the images
    for img in s:
        shutil.copyfile(img.path, os.path.join(images_sub_dir, img.name))
    
    # covert ImageJ labels to yolo format and save it to labels_sub_dir
    convert_to_yolo_format(s, labels_sub_dir, tags)

successfully generated labels for image  ja-2013-04064m_0001.jpeg
successfully generated labels for image  AddedImage5.jpeg
successfully generated labels for image  AddedImage7.jpeg
successfully generated labels for image  AddedImage10.jpeg
successfully generated labels for image  ja-2009-057986_0003.jpeg
successfully generated labels for image  ma-2014-01736m_0007.jpeg
successfully generated labels for image  ma-2014-00250j_0009.jpeg
successfully generated labels for image  ma-2014-00250j_0002.jpeg
successfully generated labels for image  AddedImage1.jpeg
successfully generated labels for image  am-2015-087694_0011.jpeg
successfully generated labels for image  cm-2015-00481u_0002.jpeg
successfully generated labels for image  ja-2011-01131h_0004.jpeg
successfully generated labels for image  ja-2009-057986_0002.jpeg
successfully generated labels for image  ma-2014-01736m_0009.jpeg
successfully generated labels for image  ma-2012-01312d_0004.jpeg
successfully generated labels for image  

In [7]:
# generate yolo yaml file
yolo_yaml = os.path.join(output_dir, 'molecule_detection_yolov5.yaml')

with open(yolo_yaml, 'w') as yamlout:
    yaml.dump(
        {'train': train_dir,
         'val': val_dir,
         'nc': len(tags),
         'names': tags},
        yamlout,
        default_flow_style=None,
        sort_keys=False
    )

In [1]:
%run yolov5/train.py --img 640 --batch 16 --epochs 1 --data ./molecule_images/molecule_detection_yolov5.yaml --weights yolov5s.pt


[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=./molecule_images/molecule_detection_yolov5.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 1 commit. Use 'git pull' or 'git clone https://github.com/ultralytics/yolov5' to update.
YOLOv5 🚀 v7.0-167-g5deff14 Python-3.9.16 torch-2.0.1 CPU

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.0

In [37]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [38]:
yolov5_env = Environment(name="yolov5_env")

# Start from a base docker environments defined by Microsoft
yolov5_env.docker.base_image  = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04"

conda_dep = CondaDependencies()
# Indicate which version of python needs to be installed
conda_dep.add_conda_package('python=3.9')

# install all the yolov5 requirement at the image build time
with open('./yolov5/requirements.txt', 'r') as f:
    line = f.readline()
    
    while line != '':    
        # If the line is a comment or empty, skip it    
        if line.startswith('#') or len(line.split()) == 0:
            line = f.readline()
            continue
        # Otherwise add the corresponding package name as a dependency
        conda_dep.add_pip_package(line.split()[0])
        # Then move on to the next line in the requirements.txt file
        line = f.readline()

yolov5_env.python.conda_dependencies=conda_dep

In [39]:
yolov5_env.get_image_details


<bound method Environment.get_image_details of {
    "assetId": null,
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": "2g"
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "yolov5_env",
    "python": {
        "baseCondaEnvironment": null,
        "conda

In [40]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [41]:
subscription_id = 'd2f930b7-20f6-402b-b126-8cec0a2c1dd7'
resource_group  = 'rg-amlclass-thill3'
workspace_name  = 'azureml-thill3'
ws = Workspace(subscription_id, resource_group, workspace_name)

experiment = Experiment(workspace=ws, name='molecule_detection_yolo_training')

In [42]:
# Overall configuration for the script to be run on the compute cluster
config = ScriptRunConfig(source_directory='./deploy_yolo_training/',   ## folder in which the script is located
                         script='training_on_aml.py',       ## script name
                         compute_target='GPU-thill3',
                         environment=yolov5_env)   

In [43]:
os.getcwd()


'/Users/tannerjameshill/Big_Data/MSE544_yolo_training'

In [44]:
# make sure you are in the same folder of this notebook
#%cd <path-to-MSE544_yolo_training>

In [45]:
run = experiment.submit(config)
aml_url = run.get_portal_url()
print(aml_url)

https://ml.azure.com/runs/molecule_detection_yolo_training_1684347335_756e9591?wsid=/subscriptions/d2f930b7-20f6-402b-b126-8cec0a2c1dd7/resourcegroups/rg-amlclass-thill3/workspaces/azureml-thill3&tid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2
