In [1]:
from architectures import yolov8
from trackers import botsort
from data import yolo_dataset, image_processor
import wandb
import json
import os

> run `wandb login` in terminal before running this notebook

### Select a model, dataset, tracker and hyperparameters

In [2]:
architectures = {
  "yolov8": yolov8.YoloV8
}

trackers = {
  'botsort': botsort.BotSort
  }

dataset_mapping = {
 "yolov8": yolo_dataset.YoloDataset
  }

In [3]:
STD_MODEL_FOLDER = '/vol/biomedic3/bglocker/ugproj2324/fv220/dev/SharkTrack-Dev/model_development/models'
REQUIRED_TRAIN_PARAMS = ["model_name", "model_path", "architecture", "epochs", "batch_size", "img_size", "lr", "greyscale", "model_size", "patience", "training_data", "pretrained_model_path", "annotations_path"]
REQUIRED_EVAL_PARAMS = ["conf_threshold", "eval_data", "iou_association_threshold", "tracker"]
DYNAMIC_PARAMS = ["pretrained", "fine_tuning"]

def load_trained_models():
    with open("./assets/trained_models.json", "r") as file:
        return json.load(file)

def check_required_params(params, required_params):
    return all(param in params for param in required_params)

def create_model_path(model_name):
    model_path = f"{STD_MODEL_FOLDER}/{model_name}"
    assert not os.path.exists(model_path), f"Model {model_name} already exists."
    return model_path

def get_pretrained_model_path(model_name, trained_models):
    assert model_name in trained_models, "Pretrained model is not available."
    model_path = trained_models[model_name]["model_path"]
    assert os.path.exists(model_path), f"Pretrained model {model_name} does not exist."
    return model_path

def construct_hyperparameters(**config):
    trained_models = load_trained_models()
    assert trained_models is not None, "Trained models data is not available."

    hyperparameters = {"model_name": config["model_name"]}
    model_pretrained = hyperparameters["model_name"] in trained_models
    fine_tuning = not model_pretrained and config["pretrained_model_path"] is not None

    if model_pretrained:
        assert config["pretrained_model_path"] is None, "Pretrained model path is not required."
        model_train_params = trained_models[hyperparameters["model_name"]]
        assert model_train_params is not None and check_required_params(model_train_params, REQUIRED_TRAIN_PARAMS), "Missing required training parameters."
        hyperparameters.update({param: model_train_params[param] for param in REQUIRED_TRAIN_PARAMS})
        hyperparameters.update({param: config[param] for param in REQUIRED_EVAL_PARAMS})
    else:
        model_path = create_model_path(hyperparameters["model_name"])
        hyperparameters.update({**config, "model_path": model_path})
        if fine_tuning:
            assert "pretrained_model_path" in config, "Pretrained model path is missing."
            if '/' not in config["pretrained_model_path"]:
                # passed model name, not path
                model_path = get_pretrained_model_path(config["pretrained_model_path"], trained_models)
                hyperparameters["pretrained_model_path"] = model_path
        hyperparameters['annotations_path'] = os.path.join(hyperparameters["model_path"], "annotations.csv")

    hyperparameters['pretrained'] = model_pretrained
    hyperparameters['fine_tuning'] = fine_tuning

    assert check_required_params(hyperparameters, REQUIRED_TRAIN_PARAMS + REQUIRED_EVAL_PARAMS + DYNAMIC_PARAMS), "Some required hyperparameters are missing."

    return hyperparameters

def save_hyperparameters(hyperparameters):
    with open("./assets/trained_models.json", "r+") as file:
        trained_models = json.load(file)
        trained_models[hyperparameters['model_name']] = hyperparameters
        file.seek(0)
        json.dump(trained_models, file, indent=4)
        file.truncate()


In [4]:
def prepare_dataset(hyperparameters, metrics):
    transforms = [image_processor.ImageProcessor.bgr2rgb]
    data_dir = "/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/images_raw/"
    data_config = hyperparameters['training_data']
    dataset_class = dataset_mapping[hyperparameters['architecture']]
    dataset = dataset_class(data_config['dataset_name'], data_dir, data_config['datasets'], data_config.get('augmentations', []), transforms)
    dataset.get_info()
    metrics['dataset_size'] = len(dataset)
    return dataset, metrics

def train_model(model, dataset, metrics):
    print("Training model...")
    train_time, dataset_time, device, final_model_path = model.train(dataset)
    metrics['training_time'] = train_time
    metrics['dataset_building_time'] = dataset_time
    metrics['training_device'] = device
    return final_model_path, metrics

def evaluate_model(model, metrics):
    mota, motp, idf1, track_time, device = model.evaluate()
    metrics['mota'] = mota
    metrics['motp'] = motp
    metrics['idf1'] = idf1
    metrics['tracking_time'] = track_time
    metrics['tracking_device'] = device
    # TODO: log image as well
    return metrics

def log_images(model_path):
    model_folder = os.path.dirname(model_path)
    plot_names = [f for f in os.listdir(model_folder) if os.path.isfile(os.path.join(model_folder, f)) and f.endswith('.png')]
    for plot in plot_names:
        wandb.log({plot: wandb.Image(os.path.join(model_folder, plot))})

def model_experimentation(hyperparameters):
    try:
        tracker = trackers[hyperparameters['tracker']]()
        model = architectures[hyperparameters['architecture']](hyperparameters, tracker)
        metrics = {}
        
        # Training
        if not hyperparameters['pretrained']:
            dataset, metrics = prepare_dataset(hyperparameters, metrics)
            metrics['dataset_size'] = len(dataset)
            final_model_path, metrics = train_model(model, dataset, metrics)
            hyperparameters['model_path'] = final_model_path

        metrics = evaluate_model(model, metrics)
        save_hyperparameters(hyperparameters)
        
        print('Initialising wandb...')
        wandb.init(project="SharkTrack", name=hyperparameters["model_name"], config=hyperparameters, job_type="training")
        log_images(hyperparameters['model_path'])
        wandb.log(metrics)
        wandb.finish()
    finally:
        wandb.finish()



### Delete model

In [5]:
# Delete folder from model_development/models
# Delete entry from trained_models.json

def delete_model(model_name):
  model_path = f"{STD_MODEL_FOLDER}/{model_name}"
  assert os.path.exists(model_path), f"Model {model_name} does not exist."
  os.system(f"rm -rf {model_path}")
  with open("./assets/trained_models.json", "r+") as file:
    trained_models = json.load(file)
    if model_name in trained_models:
      del trained_models[model_name]
      file.seek(0)
      json.dump(trained_models, file, indent=4)
      file.truncate()

## Construct Hyperparameters

In [6]:
# delete_model("test_yolo")


model_name = "yolov8n-test"
dataset_params = {
  "dataset_name": "test_mvd",
  "datasets": {
        "rf1": 0.05,
        "rf2": 0.05,
        "rf3": 0.05,
        "mwitt": 0.05,
        "openimagesv7": 0.05,
        "sl": 0.05,
        "backgrounds": 0.05
  },
  # "augmentations": ["Equalise", "Rotate", "Crop", "Bbox-rotate", "Cutout"]
  "augmentations": []
}

train_params = {
  "model_name": "yoloV8-medium-mvd2",
  "architecture": "yolov8",
  "epochs": 5,
  "batch_size": 16,
  "img_size": 640,
  "lr": 0.01,
  "greyscale": False,
  "model_size": "n",
  "pretrained_model_path": None, # we can specify the path of a pretrained model to use. MUST BE in ./assets/trained_models.json
  "patience": 10,
  "training_data": None #dataset_params
}

eval_params = {
  "conf_threshold": 0.2,
  "eval_data": "eval1",
  "iou_association_threshold": 0.5,
  "tracker": "botsort"
}

hyperparameters = construct_hyperparameters(**train_params, **eval_params)

print(hyperparameters)

{'model_name': 'yoloV8-medium-mvd2', 'model_path': '/vol/biomedic3/bglocker/ugproj2324/fv220/dev/old/shark_locator_tests/runs/detect/yolov8m_mvd2/best.pt', 'architecture': 'yolov8', 'epochs': 50, 'batch_size': 16, 'img_size': 640, 'lr': 0.01, 'greyscale': False, 'model_size': 'm', 'patience': 10, 'training_data': {'name': 'mvd2', 'datasets': {'rf1': 0.2, 'rf2': 0.35, 'rf3': 1, 'mwitt': 1, 'openimagesv7': 1, 'sl': 1, 'backgrounds': 1}, 'augmentations': ['Equalise', 'Rotate', 'Crop', 'Bbox-rotate']}, 'pretrained_model_path': None, 'annotations_path': '/vol/biomedic3/bglocker/ugproj2324/fv220/dev/old/shark_locator_tests/runs/detect/yolov8m_mvd2/annotations.csv', 'conf_threshold': 0.2, 'eval_data': 'eval1', 'iou_association_threshold': 0.5, 'tracker': 'botsort', 'pretrained': True, 'fine_tuning': False}


In [7]:
model_experimentation(hyperparameters)

Loading pretrained model - no training required
Initialised Model yoloV8-medium-mvd2 
Evaluating easy1


errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (1/598) /vol/biomedic3/bglocker/ugproj2324/fv220/datasets/sharktrack_test/videos/easy1.mp4: 384x640 (no detections), 113.3ms
video 1/1 (2/598) /vol/biomedic3/bglocker/ugproj2324/fv220/datasets/sharktrack_test/videos/easy1.mp4: 384x640 (no detections), 6.5ms
video 1/1 (3/598) /vol/biomedic3/bglocker/ugproj2324/fv220/datasets/sharktrack_test/videos/easy1.mp4: 384x640 1 shark, 6.2ms
video 1/1 (4/598) /vol/biomedic3/bglocker/ugproj2324/fv220/datasets/sharktra