In [34]:
%pip install -r requirements.txt --user 
%pip install scikit-learn scikit-image azureml-core --user

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [35]:
from util import labeledImage, normalize_coordinates, convert_to_yolo_format
from sklearn.model_selection import train_test_split
import os, shutil, yaml


In [36]:
path_to_Mse544_cv = '/Users/stlp/Downloads/Mse544-CustomVision'

source_images_dir = f'{path_to_Mse544_cv}/molecules/'
source_labels_dir = f'{path_to_Mse544_cv}/molecules/labels/'

labeled_images = []
tag = 'molecule' 

for file in os.listdir(source_images_dir):
    # find all jpeg file and it's ImageJ label
    if file.endswith(".jpeg"):
        image_path = os.path.join(source_images_dir, file)
        label_path = os.path.join(source_labels_dir, file.split('.')[0] + '.txt')
        labeled_images.append(labeledImage(image_path))
        labeled_images[-1].add_labels_from_file(tag, label_path)

In [37]:
train_and_val_set, test_set = train_test_split(labeled_images, test_size=0.1)
train_set, val_set = train_test_split(train_and_val_set, test_size=(2/9))

len(train_set), len(val_set), len(test_set)

(35, 10, 5)

In [38]:
output_dir = os.path.join(os.getcwd(),'molecule_images')
if not os.path.exists(output_dir): os.mkdir(output_dir)

train_dir = os.path.join(output_dir, 'train') 
val_dir   = os.path.join(output_dir, 'val') 
test_dir  = os.path.join(output_dir, 'test') 

# Create the sub-directories
for d in [train_dir, val_dir, test_dir]:
    if not os.path.exists(d): os.mkdir(d)
    
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')
    
    for sub_dir in [images_sub_dir, labels_sub_dir]:
        if not os.path.exists(sub_dir): os.mkdir(sub_dir)

In [39]:
# make unified yolo tags 
tags = [tag]

# zip the dataset
dataset = [(train_dir, train_set),(val_dir, val_set),(test_dir, test_set)]

for d, s in dataset:
    images_sub_dir = os.path.join(d, 'images')
    labels_sub_dir = os.path.join(d, 'labels')

    # copy over the images
    for img in s:
        shutil.copyfile(img.path, os.path.join(images_sub_dir, img.name))
    
    # covert ImageJ labels to yolo format and save it to labels_sub_dir
    convert_to_yolo_format(s, labels_sub_dir, tags)

successfully generated labels for image  am-2014-02795y_0009.jpeg
successfully generated labels for image  ja-2011-10687r_0013.jpeg
successfully generated labels for image  cm-2012-017006_0003.jpeg
successfully generated labels for image  cm-2014-00172w_0011.jpeg
successfully generated labels for image  am-2015-087694_0002.jpeg
successfully generated labels for image  cm-2012-011056_0001.jpeg
successfully generated labels for image  ja-2016-05418c_0001.jpeg
successfully generated labels for image  cm-2015-032569_0005.jpeg
successfully generated labels for image  cm-2017-00595k_0001.jpeg
successfully generated labels for image  ma-2010-01523a_0003.jpeg
successfully generated labels for image  cm-2015-01520c_0001.jpeg
successfully generated labels for image  cm-2015-030982_0001.jpeg
successfully generated labels for image  ma-2014-00333r_0009.jpeg
successfully generated labels for image  cm-2014-01513n_0001.jpeg
successfully generated labels for image  ja-2013-04064m_0005.jpeg
successful

In [40]:
# generate yolo yaml file
yolo_yaml = os.path.join(output_dir, 'molecule_detection_yolov5.yaml')

with open(yolo_yaml, 'w') as yamlout:
    yaml.dump(
        {'train': train_dir,
         'val': val_dir,
         'nc': len(tags),
         'names': tags},
        yamlout,
        default_flow_style=None,
        sort_keys=False
    )

In [41]:
%run train.py --img 640 --batch 16 --epochs 1 --data molecule_images/molecule_detection_yolov5.yaml --weights yolov5s.pt


[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=molecule_images/molecule_detection_yolov5.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-313-g712de55a Python-3.9.19 torch-2.2.2 CPU

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_l

AttributeError: module '__main__' has no attribute '__spec__'

In [42]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [43]:
yolov5_env = Environment(name="yolov5_env")

# Start from a base docker environments defined by Microsoft
yolov5_env.docker.base_image  = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04"

conda_dep = CondaDependencies()
# Indicate which version of python needs to be installed
conda_dep.add_conda_package('python=3.9')

# install all the yolov5 requirement at the image build time
with open('requirements.txt', 'r') as f:
    line = f.readline()
    
    while line != '':    
        # If the line is a comment or empty, skip it    
        if line.startswith('#') or len(line.split()) == 0:
            line = f.readline()
            continue
        # Otherwise add the corresponding package name as a dependency
        conda_dep.add_pip_package(line.split()[0])
        # Then move on to the next line in the requirements.txt file
        line = f.readline()

yolov5_env.python.conda_dependencies=conda_dep

In [44]:
yolov5_env.get_image_details


<bound method Environment.get_image_details of {
    "assetId": null,
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": "2g"
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "yolov5_env",
    "python": {
        "baseCondaEnvironment": null,
        "conda

In [45]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.core.conda_dependencies import CondaDependencies

In [46]:
subscription_id = 'cbcd0d6a-0ae5-48a7-a46d-4a4f3c692c71'
resource_group  = 'rg-amlclass-tfarmer5'
workspace_name  = 'azureml-tfarmer5'
ws = Workspace(subscription_id, resource_group, workspace_name)

experiment = Experiment(workspace=ws, name='molecule_detection_yolo_training')

In [50]:
# Overall configuration for the script to be run on the compute cluster
config = ScriptRunConfig(source_directory='./deploy_yolo_training/',   ## folder in which the script is located
                         script='trainin_on_aml.py',       ## script name
                         compute_target='GPU-tfarmer5',
                         environment=yolov5_env)   


In [51]:
os.getcwd()


'/Users/stlp/Downloads/Mse544-CustomVision/MSE544_yolo_training/yolov5'

In [56]:
run = experiment.submit(config)
aml_url = run.get_portal_url()
print(aml_url)

https://ml.azure.com/runs/molecule_detection_yolo_training_1716257158_5a34c6ef?wsid=/subscriptions/cbcd0d6a-0ae5-48a7-a46d-4a4f3c692c71/resourcegroups/rg-amlclass-tfarmer5/workspaces/azureml-tfarmer5&tid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2


In [79]:
!python detect.py --source MSE544_yolo_training/yolov5/deploy_yolo_training/molecule_images/test/images/Screenshot 2024-05-20 at 8.08.26 PM.png  --weights deploy_yolo_training/Job_loyal_tooth_67107cs9_OutputsAndLogs/outputs/runs/train/exp/weights/best.pt --conf 0.5


usage: detect.py [-h] [--weights WEIGHTS [WEIGHTS ...]] [--source SOURCE]
                 [--data DATA] [--imgsz IMGSZ [IMGSZ ...]]
                 [--conf-thres CONF_THRES] [--iou-thres IOU_THRES]
                 [--max-det MAX_DET] [--device DEVICE] [--view-img]
                 [--save-txt] [--save-csv] [--save-conf] [--save-crop]
                 [--nosave] [--classes CLASSES [CLASSES ...]] [--agnostic-nms]
                 [--augment] [--visualize] [--update] [--project PROJECT]
                 [--name NAME] [--exist-ok] [--line-thickness LINE_THICKNESS]
                 [--hide-labels] [--hide-conf] [--half] [--dnn]
                 [--vid-stride VID_STRIDE]
detect.py: error: unrecognized arguments: 2024-05-20 at 8.08.26 PM.png


In [3]:
!python detect.py --source "deploy_yolo_training/molecule_images/test/images/andrew2.png" --weights deploy_yolo_training/Job_loyal_tooth_67107cs9_OutputsAndLogs/outputs/runs/train/exp/weights/best.pt --conf 0.5


[34m[1mdetect: [0mweights=['deploy_yolo_training/Job_loyal_tooth_67107cs9_OutputsAndLogs/outputs/runs/train/exp/weights/best.pt'], source=deploy_yolo_training/molecule_images/test/images/andrew2.png, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.5, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-313-g712de55a Python-3.9.19 torch-2.2.2 CPU

Fusing layers... 
[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/1 /Users/stlp/Downloads/Mse544-CustomVision/MSE544_yolo_training/yolov5/deploy_yolo_training/molecule_images/test/images/andrew2.png: 224x640 2 molecules