diff --git a/example-get-started-experiments/.gitignore b/example-get-started-experiments/.gitignore new file mode 100644 index 00000000..e81293d0 --- /dev/null +++ b/example-get-started-experiments/.gitignore @@ -0,0 +1,4 @@ +# Custom +*.zip +/tmp +build/ diff --git a/example-get-started-experiments/README.md b/example-get-started-experiments/README.md new file mode 100644 index 00000000..4555f110 --- /dev/null +++ b/example-get-started-experiments/README.md @@ -0,0 +1,61 @@ +Generate the actual repo by running: + +``` +bash generate.sh +``` + +The repo generated in `build/example-get-started-experiments` is intended to be +published on https://github.com/iterative/example-get-started-experiments. +Make sure the Github repo exists first and that you have appropriate write +permissions. + +Run these commands to force push it: + +``` +cd build/example-get-started-experiments +git remote add origin https://github.com/iterative/example-get-started-experiments.git +git push --force origin main +git push --force origin tune-architecture +git push --force origin --tags +``` + +Run these to drop and then rewrite the experiment references on the repo: + +``` +dvc exp remove -A -g origin +dvc exp push origin -A +``` + +And this to clean the remote cache to only contain the last iteration: + +``` +dvc gc -c --all-experiments +``` + +- `tune-architecture` P.R. + +To create a PR from the "tune-architecture" branch: + +``` +gh pr create -t "Run experiments tuning architecture" \ + -B main -H tune-architecture +``` + +Finally, return to the directory where you started: + +``` +cd ../.. +``` + +You may remove the generated repo with: + +``` +rm -fR build +``` + +- Manual Studio P.R. + +Once the repo has been generated and pushed, go to the +[corresponding Studio project](https://studio.iterative.ai/team/Iterative/projects/example-get-started-experiments-y8toqd433r) +and create a P.R. using the `Experiment` button, increasing epochs from `8` to +`12`. diff --git a/example-get-started-experiments/code/.devcontainer.json b/example-get-started-experiments/code/.devcontainer.json new file mode 100644 index 00000000..4c71e8b2 --- /dev/null +++ b/example-get-started-experiments/code/.devcontainer.json @@ -0,0 +1,18 @@ +{ + "name": "example-cv", + "image": "mcr.microsoft.com/devcontainers/python:3.10", + "runArgs": ["--ipc=host"], + "features": { + "ghcr.io/devcontainers/features/nvidia-cuda:1": { + "installCudnn": true + }, + "ghcr.io/iterative/features/dvc:1": {}, + "ghcr.io/iterative/features/nvtop:1": {} + }, + "extensions": [ + "Iterative.dvc", + "ms-python.python", + "redhat.vscode-yaml" + ], + "postCreateCommand": "pip install --user -r requirements.txt" +} diff --git a/example-get-started-experiments/code/.gitattributes b/example-get-started-experiments/code/.gitattributes new file mode 100644 index 00000000..56eb98f5 --- /dev/null +++ b/example-get-started-experiments/code/.gitattributes @@ -0,0 +1,2 @@ +*.dvc linguist-language=YAML +dvc.lock linguist-language=YAML diff --git a/example-get-started-experiments/code/.github/workflows/run-studio-experiment.yml b/example-get-started-experiments/code/.github/workflows/run-studio-experiment.yml new file mode 100644 index 00000000..8309ffdf --- /dev/null +++ b/example-get-started-experiments/code/.github/workflows/run-studio-experiment.yml @@ -0,0 +1,57 @@ +name: Run Studio Experiment +on: + push: + paths: + - ".github/workflows/run-experiment.yaml" + - "data/**" + - "src/**" + - "params.yaml" + - "dvc.*" +jobs: + deploy-runner: + if: github.actor == 'iterative-studio[bot]' + environment: cloud + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: iterative/setup-cml@v1 + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: us-east-2 + role-to-assume: arn:aws:iam::342840881361:role/SandboxUser + role-duration-seconds: 43200 + - name: Create Runner + env: + REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + run: | + cml runner launch --single \ + --labels=cml \ + --cloud=aws \ + --cloud-region=us-east \ + --cloud-hdd-size=40 \ + --cloud-type=g5.2xlarge + runner-job: + needs: deploy-runner + runs-on: [ self-hosted, cml ] + container: + image: iterativeai/cml:0-dvc2-base1-gpu + options: --gpus all --ipc host + steps: + - uses: actions/checkout@v3 + - uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: us-east-1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + - name: training + env: + REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + STUDIO_TOKEN: ${{ secrets.STUDIO_TOKEN }} + DVCLIVE_LOGLEVEL: DEBUG + run: | + pip install -r requirements.txt + dvc pull + cml comment create --publish --watch results/train/report.md & dvc exp run + dvc push + cml pr --squash --skip-ci . + cml comment create --pr --publish results/evaluate/report.md diff --git a/example-get-started-experiments/code/.gitignore b/example-get-started-experiments/code/.gitignore new file mode 100644 index 00000000..b6e47617 --- /dev/null +++ b/example-get-started-experiments/code/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/example-get-started-experiments/code/LICENSE b/example-get-started-experiments/code/LICENSE new file mode 100644 index 00000000..e0636bfa --- /dev/null +++ b/example-get-started-experiments/code/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Iterative + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/example-get-started-experiments/code/README.md b/example-get-started-experiments/code/README.md new file mode 100644 index 00000000..90ce4d7e --- /dev/null +++ b/example-get-started-experiments/code/README.md @@ -0,0 +1,138 @@ +[![DVC](https://img.shields.io/badge/-Open_in_Studio-grey.svg?style=flat-square&logo=dvc)](https://studio.iterative.ai/team/Iterative/projects/example-get-started-experiments-y8toqd433r) +[![DVC-metrics](https://img.shields.io/badge/dynamic/json?style=flat-square&colorA=grey&colorB=F46737&label=Average%20Precision&url=https://github.com/iterative/example-get-started-experiments/raw/main/results/evaluate/metrics.json&query=dice_multi)](https://github.com/iterative/example-get-started-experiments/raw/main/results/evaluate/metrics.json) + +# DVC Get Started: Experiments + +This is an auto-generated repository for use in [DVC](https://dvc.org) +[Get Started: Experiments](https://dvc.org/doc/start/experiment-management). + +This is a Computer Vision (CV) project that solves the problem of segmenting out +swimming pools from satellite images. + +We use a slightly modified version of the [BH-Pools dataset](http://patreo.dcc.ufmg.br/2020/07/29/bh-pools-watertanks-datasets/): +we split the original 4k images into tiles of 1024x1024 pixels. + + +🐛 Please report any issues found in this project here - +[example-repos-dev](https://github.com/iterative/example-repos-dev). + +## Installation + +Python 3.8+ is required to run code from this repo. + +```console +$ git clone https://github.com/iterative/example-get-started-experiments +$ cd example-get-started-experiments +``` + +Now let's install the requirements. But before we do that, we **strongly** +recommend creating a virtual environment with a tool such as +[virtualenv](https://virtualenv.pypa.io/en/stable/): + +```console +$ python -m venv .venv +$ source .venv/bin/activate +$ pip install -r src/requirements.txt +``` + +This DVC project comes with a preconfigured DVC +[remote storage](https://dvc.org/doc/commands-reference/remote) that holds raw +data (input), intermediate, and final results that are produced. This is a +read-only HTTP remote. + +```console +$ dvc remote list +storage https://remote.dvc.org/get-started-pools +``` + +You can run [`dvc pull`](https://man.dvc.org/pull) to download the data: + +```console +$ dvc pull +``` + +## Running in your environment + +Run [`dvc exp run`](https://man.dvc.org/exp/run) to reproduce the +[pipeline](https://dvc.org/doc/user-guide/pipelines/defining-pipelinese): + +```console +$ dvc exp run +Data and pipelines are up to date. +``` + +If you'd like to test commands like [`dvc push`](https://man.dvc.org/push), +that require write access to the remote storage, the easiest way would be to set +up a "local remote" on your file system: + +> This kind of remote is located in the local file system, but is external to +> the DVC project. + +```console +$ mkdir -p /tmp/dvc-storage +$ dvc remote add local /tmp/dvc-storage +``` + +You should now be able to run: + +```console +$ dvc push -r local +``` + +## Existing stages + +There is a couple of git tags in this project : + +### [1-notebook-dvclive](https://github.com/iterative/example-get-started-experiments/tree/1-notebook-dvclive) + +Contains an end-to-end Jupyter notebook that loads data, trains a model and +reports model performance. +[DVCLive](https://dvc.org/doc/dvclive) is used for experiment tracking. +See this [blog post](https://iterative.ai/blog/exp-tracking-dvc-python) for more +details. + +### [2-dvc-pipeline](https://github.com/iterative/example-get-started-experiments/tree/2-dvc-pipeline) + +Contains a DVC pipeline `dvc.yaml` that was created by refactoring the above +notebook into individual pipeline stages. + +The pipeline artifacts (processed data, model file, etc) are automatically +versioned. + +This tag also contains a GitHub Actions workflow that reruns the pipeline if any + changes are introduced to the pipeline-related files. +[CML](https://cml.dev/) is used in this workflow to provision a cloud-based GPU +machine as well as report model performance results in Pull Requests. + +## Project structure + +The data files, DVC files, and results change as stages are created one by one. +After cloning and using [`dvc pull`](https://man.dvc.org/pull) to download +data, models, and plots tracked by DVC, the workspace should look like this: + +```console +$ tree -L 2 +. +├── LICENSE +├── README.md +├── data. # <-- Directory with raw and intermediate data +│ ├── pool_data # <-- Raw image data +│ ├── pool_data.dvc # <-- .dvc file - a placeholder/pointer to raw data +│ ├── test_data # <-- Processed test data +│ └── train_data # <-- Processed train data +├── dvc.lock +├── dvc.yaml # <-- DVC pipeline file +├── models +│ └── model.pkl # <-- Trained model file +├── notebooks +│ └── TrainSegModel.ipynb # <-- Initial notebook (refactored into `dvc.yaml`) +├── params.yaml # <-- Parameters file +├── requirements.txt # <-- Python dependencies needed in the project +├── results # <-- DVCLive reports and plots +│ ├── evaluate +│ └── train +└── src # <-- Source code to run the pipeline stages + ├── data_split.py + ├── evaluate.py + └── train.py +``` diff --git a/example-get-started-experiments/code/data/.gitignore b/example-get-started-experiments/code/data/.gitignore new file mode 100644 index 00000000..6830f1fe --- /dev/null +++ b/example-get-started-experiments/code/data/.gitignore @@ -0,0 +1,3 @@ +/pool_data +/test_data +/train_data diff --git a/example-get-started-experiments/code/data/pool_data.dvc b/example-get-started-experiments/code/data/pool_data.dvc new file mode 100644 index 00000000..e83b976c --- /dev/null +++ b/example-get-started-experiments/code/data/pool_data.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 14d187e749ee5614e105741c719fa185.dir + size: 18999874 + nfiles: 183 + path: pool_data diff --git a/example-get-started-experiments/code/notebooks/TrainSegModel.ipynb b/example-get-started-experiments/code/notebooks/TrainSegModel.ipynb new file mode 100644 index 00000000..d8392782 --- /dev/null +++ b/example-get-started-experiments/code/notebooks/TrainSegModel.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "ROOT = Path(\"../\")\n", + "DATA = ROOT / \"data\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "from functools import partial\n", + "\n", + "import numpy as np\n", + "import torch\n", + "from box import ConfigBox\n", + "from dvclive import Live\n", + "from dvclive.fastai import DVCLiveCallback\n", + "from fastai.data.all import Normalize, get_files\n", + "from fastai.metrics import DiceMulti\n", + "from fastai.vision.all import (Resize, SegmentationDataLoaders, aug_transforms,\n", + " imagenet_stats, models, unet_learner)\n", + "from ruamel.yaml import YAML\n", + "from PIL import Image" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load data and split it into train/test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_pct = 0.25\n", + "\n", + "img_fpaths = get_files(DATA / \"pool_data\" / \"images\", extensions=\".jpg\")\n", + "\n", + "train_data_dir = DATA / \"train_data\"\n", + "train_data_dir.mkdir(exist_ok=True)\n", + "test_data_dir = DATA / \"test_data\"\n", + "test_data_dir.mkdir(exist_ok=True)\n", + "for img_path in img_fpaths:\n", + " msk_path = DATA / \"pool_data\" / \"masks\" / f\"{img_path.stem}.png\"\n", + " if np.random.uniform() <= test_pct:\n", + " shutil.copy(img_path, test_data_dir)\n", + " shutil.copy(msk_path, test_data_dir)\n", + " else:\n", + " shutil.copy(img_path, train_data_dir)\n", + " shutil.copy(msk_path, train_data_dir)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a data loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mask_path(x, train_data_dir):\n", + " return Path(train_data_dir) / f\"{Path(x).stem}.png\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bs = 8\n", + "valid_pct = 0.20\n", + "img_size = 256\n", + "\n", + "data_loader = SegmentationDataLoaders.from_label_func(\n", + " path=train_data_dir,\n", + " fnames=get_files(train_data_dir, extensions=\".jpg\"),\n", + " label_func=partial(get_mask_path, train_data_dir=train_data_dir),\n", + " codes=[\"not-pool\", \"pool\"],\n", + " bs=bs,\n", + " valid_pct=valid_pct,\n", + " item_tfms=Resize(img_size),\n", + " batch_tfms=[\n", + " *aug_transforms(size=img_size),\n", + " Normalize.from_stats(*imagenet_stats),\n", + " ],\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Review a sample batch of data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_loader.show_batch(alpha=0.7)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train multiple models with different learning rates using `DVCLiveCallback`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def dice(mask_pred, mask_true, classes=[0, 1], eps=1e-6):\n", + " dice_list = []\n", + " for c in classes:\n", + " y_true = mask_true == c\n", + " y_pred = mask_pred == c\n", + " intersection = 2.0 * np.sum(y_true * y_pred)\n", + " dice = intersection / (np.sum(y_true) + np.sum(y_pred) + eps)\n", + " dice_list.append(dice)\n", + " return np.mean(dice_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_arch = 'resnet18'\n", + "\n", + "for base_lr in [0.001, 0.005, 0.01]:\n", + " with Live(str(ROOT / \"results\" / \"train\"), save_dvc_exp=True) as live:\n", + " live.log_param(\"train_arch\", train_arch)\n", + " fine_tune_args = {\n", + " 'epochs': 8,\n", + " 'base_lr': base_lr\n", + " }\n", + " live.log_params(fine_tune_args)\n", + "\n", + " learn = unet_learner(data_loader, \n", + " arch=getattr(models, train_arch), \n", + " metrics=DiceMulti)\n", + " learn.fine_tune(\n", + " **fine_tune_args,\n", + " cbs=[DVCLiveCallback(live=live)])\n", + "\n", + "\n", + " test_img_fpaths = get_files(DATA / \"test_data\", extensions=\".jpg\")\n", + " test_dl = learn.dls.test_dl(test_img_fpaths)\n", + " preds, _ = learn.get_preds(dl=test_dl)\n", + " masks_pred = np.array(preds[:, 1, :] > 0.5, dtype=int)\n", + " test_mask_fpaths = [\n", + " get_mask_path(fpath, DATA / \"test_data\") for fpath in test_img_fpaths\n", + " ]\n", + " masks_true = [Image.open(mask_path) for mask_path in test_mask_fpaths]\n", + " masks_true = [\n", + " np.array(img.resize((img_size, img_size)), dtype=int) for img in masks_true\n", + " ]\n", + " dice_multi = 0.0\n", + " for ii in range(len(masks_true)):\n", + " mask_pred, mask_true = masks_pred[ii], masks_true[ii]\n", + " dice_multi += dice(mask_true, mask_pred) / len(masks_true)\n", + " live.summary[\"evaluate/dice_multi\"] = dice_multi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compare experiments\n", + "!dvc exp show --only-changed" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Review sample preditions vs ground truth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "learn.show_results(max_n=6, alpha=0.7)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Review instances where loss function values are the highest (i.e. model is likely to be wrong)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fastai.vision.all import SegmentationInterpretation\n", + "\n", + "interp = SegmentationInterpretation.from_learner(learn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "interp.plot_top_losses(k=5, alpha=0.7)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4 (main, Jan 25 2023, 00:13:50) [GCC 9.4.0]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "3ad933181bd8a04b432d3370b9dc3b0662ad032c4dfaa4e4f1596c548f763858" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example-get-started-experiments/code/params.yaml b/example-get-started-experiments/code/params.yaml new file mode 100644 index 00000000..b6977226 --- /dev/null +++ b/example-get-started-experiments/code/params.yaml @@ -0,0 +1,17 @@ +base: + random_seed: 42 + +data_split: + test_pct: 0.15 + +train: + valid_pct: 0.1 + arch: resnet18 + img_size: 256 + batch_size: 8 + fine_tune_args: + epochs: 8 + base_lr: 0.01 + +evaluate: + n_samples_to_save: 10 diff --git a/example-get-started-experiments/code/requirements.txt b/example-get-started-experiments/code/requirements.txt new file mode 100644 index 00000000..fa40d9c7 --- /dev/null +++ b/example-get-started-experiments/code/requirements.txt @@ -0,0 +1,4 @@ +dvc[s3]>=2.43.1 +dvclive>=1.3.4 +fastai +python-box diff --git a/example-get-started-experiments/code/src/data_split.py b/example-get-started-experiments/code/src/data_split.py new file mode 100644 index 00000000..4c046d07 --- /dev/null +++ b/example-get-started-experiments/code/src/data_split.py @@ -0,0 +1,33 @@ +import shutil +from pathlib import Path + +import numpy as np +from box import ConfigBox +from fastai.vision.all import get_files +from ruamel.yaml import YAML + + +yaml = YAML(typ="safe") + + +def data_split(): + params = ConfigBox(yaml.load(open("params.yaml", encoding="utf-8"))) + np.random.seed(params.base.random_seed) + img_fpaths = get_files(Path("data") / "pool_data" / "images", extensions=".jpg") + + train_data_dir = Path("data") / "train_data" + train_data_dir.mkdir(exist_ok=True) + test_data_dir = Path("data") / "test_data" + test_data_dir.mkdir(exist_ok=True) + for img_path in img_fpaths: + msk_path = Path("data") / "pool_data" / "masks" / f"{img_path.stem}.png" + if np.random.uniform() <= params.data_split.test_pct: + shutil.copy(img_path, test_data_dir) + shutil.copy(msk_path, test_data_dir) + else: + shutil.copy(img_path, train_data_dir) + shutil.copy(msk_path, train_data_dir) + + +if __name__ == "__main__": + data_split() diff --git a/example-get-started-experiments/code/src/evaluate.py b/example-get-started-experiments/code/src/evaluate.py new file mode 100644 index 00000000..cd40a9ca --- /dev/null +++ b/example-get-started-experiments/code/src/evaluate.py @@ -0,0 +1,99 @@ +from pathlib import Path + +import numpy as np +from box import ConfigBox +from dvclive import Live +from fastai.vision.all import get_files, load_learner +from PIL import Image +from ruamel.yaml import YAML + + +yaml = YAML(typ="safe") + + +def dice(mask_pred, mask_true, classes=[0, 1], eps=1e-6): + dice_list = [] + for c in classes: + y_true = mask_true == c + y_pred = mask_pred == c + intersection = 2.0 * np.sum(y_true * y_pred) + dice = intersection / (np.sum(y_true) + np.sum(y_pred) + eps) + dice_list.append(dice) + return np.mean(dice_list) + + +def paint_mask(mask, color_map={0: (0, 0, 0), 1: (0, 0, 255)}): + vis_shape = mask.shape + (3,) + vis = np.zeros(vis_shape) + for i, c in color_map.items(): + vis[mask == i] = color_map[i] + return Image.fromarray(vis.astype(np.uint8)) + + +def stack_images(im1, im2): + dst = Image.new("RGB", (im1.width + im2.width, im1.height)) + dst.paste(im1, (0, 0)) + dst.paste(im2, (im1.width, 0)) + return dst + + +def get_overlay_image(img_fpath, mask_true, mask_pred, img_size): + img_pil = Image.open(img_fpath).resize((img_size, img_size)) + overlay_img_true = Image.blend( + img_pil.convert("RGBA"), paint_mask(mask_true).convert("RGBA"), 0.5 + ) + + new_color_map = { + 0: (0, 0, 0), # no color - TN + 1: (255, 0, 255), # purple - FN + 2: (255, 255, 0), # yellow - FP + 3: (0, 0, 255), # blue - TP + } + combined_mask = mask_true + 2 * mask_pred + + overlay_img_pred = Image.blend( + img_pil.convert("RGBA"), + paint_mask(combined_mask, color_map=new_color_map).convert("RGBA"), + 0.5, + ) + stacked_image = stack_images(overlay_img_true, overlay_img_pred) + return stacked_image + + +def get_mask_path(x, train_data_dir): + return Path(train_data_dir) / f"{Path(x).stem}.png" + + +def evaluate(): + params = ConfigBox(yaml.load(open("params.yaml", encoding="utf-8"))) + img_size = params.train.img_size + model_fpath = Path("models") / "model.pkl" + learn = load_learner(model_fpath, cpu=False) + test_img_fpaths = get_files(Path("data") / "test_data", extensions=".jpg") + test_dl = learn.dls.test_dl(test_img_fpaths) + preds, _ = learn.get_preds(dl=test_dl) + masks_pred = np.array(preds[:, 1, :] > 0.5, dtype=int) + test_mask_fpaths = [ + get_mask_path(fpath, Path("data") / "test_data") for fpath in test_img_fpaths + ] + masks_true = [Image.open(mask_path) for mask_path in test_mask_fpaths] + masks_true = [ + np.array(img.resize((img_size, img_size)), dtype=int) for img in masks_true + ] + with Live("results/evaluate", report="md") as live: + dice_multi = 0.0 + for ii in range(len(masks_true)): + mask_pred, mask_true = masks_pred[ii], masks_true[ii] + dice_multi += dice(mask_true, mask_pred) / len(masks_true) + + if ii < params.evaluate.n_samples_to_save: + stacked_image = get_overlay_image( + test_img_fpaths[ii], mask_true, mask_pred, img_size + ) + live.log_image(f"{Path(test_img_fpaths[ii]).stem}.png", stacked_image) + + live.summary["dice_multi"] = dice_multi + + +if __name__ == "__main__": + evaluate() diff --git a/example-get-started-experiments/code/src/train.py b/example-get-started-experiments/code/src/train.py new file mode 100644 index 00000000..21ed91e4 --- /dev/null +++ b/example-get-started-experiments/code/src/train.py @@ -0,0 +1,74 @@ +import random +from functools import partial +from pathlib import Path + +import numpy as np +import torch +from box import ConfigBox +from dvclive.fastai import DVCLiveCallback +from fastai.data.all import Normalize, get_files +from fastai.metrics import DiceMulti +from fastai.vision.all import ( + Resize, + SegmentationDataLoaders, + aug_transforms, + imagenet_stats, + models, + unet_learner, +) +from ruamel.yaml import YAML + +yaml = YAML(typ="safe") + + +def get_mask_path(x, train_data_dir): + return Path(train_data_dir) / f"{Path(x).stem}.png" + + +def train(): + params = ConfigBox(yaml.load(open("params.yaml", encoding="utf-8"))) + + np.random.seed(params.base.random_seed) + torch.manual_seed(params.base.random_seed) + random.seed(params.base.random_seed) + train_data_dir = Path("data") / "train_data" + + data_loader = SegmentationDataLoaders.from_label_func( + path=train_data_dir, + fnames=get_files(train_data_dir, extensions=".jpg"), + label_func=partial(get_mask_path, train_data_dir=train_data_dir), + codes=["not-pool", "pool"], + bs=params.train.batch_size, + valid_pct=params.train.valid_pct, + item_tfms=Resize(params.train.img_size), + batch_tfms=[ + *aug_transforms(size=params.train.img_size), + Normalize.from_stats(*imagenet_stats), + ], + ) + + model_names = [ + name + for name in dir(models) + if not name.startswith("_") + and name.islower() + and name not in ("all", "tvm", "unet", "xresnet") + ] + if params.train.arch not in model_names: + raise ValueError(f"Unsupported model, must be one of:\n{model_names}") + + learn = unet_learner( + data_loader, arch=getattr(models, params.train.arch), metrics=DiceMulti + ) + + learn.fine_tune( + **params.train.fine_tune_args, + cbs=[DVCLiveCallback(dir="results/train", report="md")], + ) + models_dir = Path("models") + models_dir.mkdir(exist_ok=True) + learn.export(fname=(models_dir / "model.pkl").absolute()) + + +if __name__ == "__main__": + train() diff --git a/example-get-started-experiments/generate.sh b/example-get-started-experiments/generate.sh new file mode 100755 index 00000000..8aa30710 --- /dev/null +++ b/example-get-started-experiments/generate.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# Setup script env: +# e Exit immediately if a command exits with a non-zero exit status. +# u Treat unset variables as an error when substituting. +# x Print commands and their arguments as they are executed. +set -eux +HERE="$( cd "$(dirname "$0")" ; pwd -P )" +REPO_NAME="example-get-started-experiments" +REPO_PATH="$HERE/build/$REPO_NAME" +PROD=${1:-false} + +if [ -d "$REPO_PATH" ]; then + echo "Repo $REPO_PATH already exists, please remove it first." + exit 1 +fi + +TOTAL_TAGS=3 +STEP_TIME=100000 +BEGIN_TIME=$(( $(date +%s) - ( ${TOTAL_TAGS} * ${STEP_TIME}) )) +export TAG_TIME=${BEGIN_TIME} +export GIT_AUTHOR_DATE="${TAG_TIME} +0000" +export GIT_COMMITTER_DATE="${TAG_TIME} +0000" +tick(){ + export TAG_TIME=$(( ${TAG_TIME} + ${STEP_TIME} )) + export GIT_AUTHOR_DATE="${TAG_TIME} +0000" + export GIT_COMMITTER_DATE="${TAG_TIME} +0000" +} + +export GIT_AUTHOR_NAME="Alex Kim" +export GIT_AUTHOR_EMAIL="alex000kim@gmail.com" +export GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME" +export GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL" + +mkdir -p $REPO_PATH +pushd $REPO_PATH + +virtualenv -p python3 .venv +export VIRTUAL_ENV_DISABLE_PROMPT=true +source .venv/bin/activate +echo '.venv/' > .gitignore + +# Installing from main since we'd like to update repo before +# the release +pip install "git+https://github.com/iterative/dvc#egg=dvc[s3]" + +git init +cp $HERE/code/README.md . +cp $HERE/code/.devcontainer.json . +cp $HERE/code/.gitattributes . +cp $HERE/code/requirements.txt . +cp -r $HERE/code/.github . +git add . +tick +git commit -m "Initialize Git repository" +git branch -M main + + +dvc init +# Remote active on this env only, for writing to HTTP redirect below. +dvc remote add -d --local storage s3://dvc-public/remote/get-started-pools +# Actual remote for generated project (read-only). Redirect of S3 bucket above. +dvc remote add -d storage https://remote.dvc.org/get-started-pools +git add . +tick +git commit -m "Initialize DVC project" + + +cp -r $HERE/code/data . +git add data/.gitignore data/pool_data.dvc +tick +git commit -m "Add data" +dvc pull + + +cp -r $HERE/code/notebooks . +git add . +git commit -m "Add notebook using DVCLive" + +pip install -r requirements.txt +pip install jupyter +jupyter nbconvert --execute 'notebooks/TrainSegModel.ipynb' --inplace +# Apply best experiment +BEST_EXP_ROW=$(dvc exp show --drop '.*' --keep 'Experiment|evaluate/dice_multi|base_lr' --csv --sort-by evaluate/dice_multi | tail -n 1) +BEST_EXP_NAME=$(echo $BEST_EXP_ROW | cut -d, -f 1) +BEST_EXP_BASE_LR=$(echo $BEST_EXP_ROW | cut -d, -f 3) +dvc exp apply $BEST_EXP_NAME +git add . +tick +git commit -m "Run notebook and apply best experiment" +git tag -a "1-notebook-dvclive" -m "Experiment using Notebook" + + +cp -r $HERE/code/src . +cp $HERE/code/params.yaml . + sed -e "s/base_lr: 0.01/base_lr: $BEST_EXP_BASE_LR/" -i".bkp" params.yaml + +dvc stage add -n data_split \ + -p base,data_split \ + -d src/data_split.py -d data/pool_data \ + -o data/train_data -o data/test_data \ + python src/data_split.py + +dvc stage add -n train \ + -p base,train \ + -d src/train.py -d data/train_data \ + -o models/model.pkl \ + -M results/train/metrics.json \ + --plots-no-cache results/train/plots \ + python src/train.py + +dvc stage add -n evaluate \ + -p base,evaluate \ + -d src/evaluate.py -d models/model.pkl -d data/test_data \ + -M results/evaluate/metrics.json \ + --plots-no-cache results/evaluate/plots \ + python src/evaluate.py +git add . +tick +git commit -m "Convert Notebook to dvc.yaml pipeline" + + +dvc exp run +git add . +tick +git commit -m "Run dvc.yaml pipeline" +git tag -a "2-dvc-pipeline" -m "Experiment using dvc pipeline" +dvc push + + +export GIT_AUTHOR_NAME="David de la Iglesia" +export GIT_AUTHOR_EMAIL="daviddelaiglesiacastro@gmail.com" +export GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME" +export GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL" + +git checkout -b "tune-architecture" + +unset GIT_AUTHOR_DATE +unset GIT_COMMITTER_DATE + +dvc exp run --queue --set-param train.arch=alexnet +dvc exp run --queue --set-param train.arch=resnet34 +dvc exp run --queue --set-param train.arch=squeezenet1_1 + +dvc exp run --run-all +# Apply best experiment +EXP=$(dvc exp show --csv --sort-by results/evaluate/metrics.json:dice_multi | tail -n 1 | cut -d , -f 1) +dvc exp apply $EXP +tick +git commit -am "Run experiments tuning architecture. Apply best one" +dvc push + +git checkout main + +popd + +unset TAG_TIME +unset GIT_AUTHOR_DATE +unset GIT_COMMITTER_DATE +unset GIT_AUTHOR_NAME +unset GIT_AUTHOR_EMAIL +unset GIT_COMMITTER_NAME +unset GIT_COMMITTER_EMAIL + +cat README.md