In [20]:
import os
import logging
import yaml
import s3fs
import boto3
import numpy as np
import torch
import mlflow
from mlflow.exceptions import MlflowException
from mlflow.models import infer_signature
from ultralytics import settings
from ultralytics import YOLO

In [None]:
# os.environ['AWS_REGION'] = 'eu-central-1'
# os.environ['AWS_ACCESS_KEY_ID'] = ''
# os.environ['AWS_SECRET_ACCESS_KEY'] = ''

In [None]:
# !pip install databricks-cli
# os.environ['DATABRICKS_HOST'] = 'https://community.cloud.databricks.com/'
# os.environ['DATABRICKS_USERNAME'] = ''
# os.environ['DATABRICKS_PASSWORD'] = ''

In [4]:
logging.getLogger("mlflow").setLevel(logging.DEBUG)

In [5]:
artifact_uri = f"s3://stream-n-detect/models"
DATA_YAML_PATH = 'data/data.yaml'
fs = s3fs.S3FileSystem()
settings.update({"dvc": False, "mlflow": False})
print(settings)

{'settings_version': '0.0.4', 'datasets_dir': '/Users/asukh/Work/VSCode/Python/stream-and-detect/trainer', 'weights_dir': 'weights', 'runs_dir': 'runs', 'uuid': '8c0982ee71335cf4485524a18ce8e44caf5e59c587e48ff1e991579ce82db241', 'sync': True, 'api_key': '', 'openai_api_key': '', 'clearml': True, 'comet': True, 'dvc': False, 'hub': True, 'mlflow': False, 'neptune': True, 'raytune': True, 'tensorboard': True, 'wandb': True}


In [6]:
mlflow.set_tracking_uri(uri="databricks")
experiment_name = "/Users/andriysukh0912@gmail.com/yolov8n-detect"
try:
    experiment_id = mlflow.create_experiment(experiment_name, artifact_location=artifact_uri)
except MlflowException:
    experiment = mlflow.get_experiment_by_name(experiment_name)
    experiment_id = experiment.experiment_id

mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://stream-n-detect/models', creation_time=1718367076967, experiment_id='541998404243590', last_update_time=1718367076967, lifecycle_stage='active', name='/Users/andriysukh0912@gmail.com/yolov8n-detect', tags={'mlflow.experiment.sourceName': '/Users/andriysukh0912@gmail.com/yolov8n-detect',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'andriysukh0912@gmail.com',
 'mlflow.ownerId': '3490293259147743'}>

In [11]:
# load params
with open(r"params.yaml") as f:
    params = yaml.safe_load(f)

# load a pre-trained model 
model = YOLO(params['model_type'])

In [None]:
# train
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = model.train(
    data=DATA_YAML_PATH,
    imgsz=params['imgsz'],
    batch=params['batch'],
    epochs=params['epochs'],
    optimizer=params['optimizer'],
    lr0=params['lr0'],
    seed=params['seed'],
    pretrained=params['pretrained'],
    # name=params['name'],
    device = device,
    project = params['project_dir']
)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
project_dir = params['project_dir']
results = model.val(data=DATA_YAML_PATH, device=device, project = f'{project_dir}/val/')

YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs


[34m[1mval: [0mScanning /Users/asukh/Work/VSCode/Python/stream-and-detect/trainer/data/valid/labels.cache... 549 images, 8 backgrounds, 0 corrupt: 100%|██████████| 549/549 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 35/35 [02:44<00:00,  4.71s/it]


                   all        549       6270      0.191      0.125       0.11     0.0678
                person        189        250      0.014        0.1    0.00631    0.00212
               bicycle         81        108          0          0          0          0
                   car        520       3842      0.538      0.504      0.504      0.317
            motorcycle        331       1238      0.402     0.0186     0.0418       0.02
              airplane        196        832          0          0   0.000563   0.000169
Speed: 1.4ms preprocess, 293.2ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1mmodels/val/val4[0m


In [None]:
requirements_path = "requirements.txt"
with open(requirements_path, "w") as f:
    f.write("torch==2.3.1\n")
    f.write("cloudpickle==3.0.0\n")
    # Add other dependencies if needed

with mlflow.start_run(experiment_id=experiment_id):
    print(mlflow.get_artifact_uri())
    mlflow.log_params(params)
    mlflow.log_metric("precision", results.results_dict['metrics/precision(B)'])
    mlflow.log_metric("recall", results.results_dict['metrics/recall(B)'])
    mlflow.log_metric("fitness", results.fitness)
    mlflow.log_metric("inference", results.speed['inference'])
    mlflow.log_metric("loss", results.speed['loss'])
    mlflow.set_tag("training info", "yolo8n traffic data")
    signature = infer_signature(params=params)

    model_info = mlflow.pytorch.log_model(pytorch_model = model.model, artifact_path="models", signature=signature, pip_requirements=requirements_path)
    # mlflow.log_artifact('models')
    

In [26]:
loaded_model = mlflow.pyfunc.load_model(model_uri=model_info.model_uri)

Downloading artifacts: 100%|██████████| 10/10 [00:00<00:00, 16.97it/s]  
