Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .github/actions/exp/jl-dvc/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: "Run experiments with JarvisLabs and DVCLive"
description: "Run experiments with JarvisLabs and DVCLive"
inputs:
CUR_BRANCH:
required: true
CUR_PR_ID:
required: true
GDRIVE_CREDENTIAL:
required: true
GH_ACCESS_TOKEN:
required: true
JARVISLABS_ID:
required: true
JARVISLABS_ACCESS_TOKEN:
required: true
runs:
using: "composite"
steps:
- name: prepare script
if: steps.check.outputs.triggered == 'true'
env:
CUR_BRANCH: ${{ steps.pr_data.outputs.branch }}
CUR_PR_ID: ${{ steps.pr_data.outputs.number }}
GDRIVE_CREDENTIAL: ${{ secrets.GDRIVE_CREDENTIAL }}
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
CLOUD_ID: ${{ env.JARVISLABS_ID }}
CLOUD_AT: ${{ env.JARVISLABS_ACCESS_TOKEN }}
run: |
envsubst \
'$CUR_BRANCH, \
$CUR_PR_ID, \
$GDRIVE_CREDENTIAL, \
$GH_ACCESS_TOKEN, \
$CLOUD_ID, \
$CLOUD_AT' \
< scripts/experiments.sh \
> scripts/experiments_tmp.sh

- name: install jarvislabs client
if: steps.check.outputs.triggered == 'true'
run: |
pip install typer
pip install git+https://github.com/jarvislabsai/jlclient.git

- name: add script to jarvislabs
id: add_script
if: steps.check.outputs.triggered == 'true'
run: |
python clouds/jarvislabs.py \
script add \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} scripts/experiments_tmp.sh \
> outputs.txt
echo "::set-output name=script_id::$(cat outputs.txt)"

- name: create vm on jarvislabs
if: steps.check.outputs.triggered == 'true'
run: |
python clouds/jarvislabs.py \
vm create \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} ${{ steps.add_script.outputs.script_id }}

- name: remove script from jarvislabs
if: steps.check.outputs.triggered == 'true'
run: |
python clouds/jarvislabs.py \
script remove \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} ${{ steps.add_script.outputs.script_id }}
68 changes: 68 additions & 0 deletions .github/actions/exp/jl-wandb/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: "Run experiments with JarvisLabs and W&B"
description: "Run experiments with JarvisLabs and W&B"
inputs:
CUR_BRANCH:
required: true
CUR_PR_ID:
required: true
GDRIVE_CREDENTIAL:
required: true
GH_ACCESS_TOKEN:
required: true
JARVISLABS_ID:
required: true
JARVISLABS_ACCESS_TOKEN:
required: true
WANDB_PROJECT:
required: true
WANDB_API_KEY:
required: true
runs:
using: "composite"
steps:
- name: prepare script
env:
CUR_BRANCH: $CUR_BRANCH
CUR_PR_ID: $CUR_PR_ID
GDRIVE_CREDENTIAL: $GDRIVE_CREDENTIAL
GH_ACCESS_TOKEN: $GH_ACCESS_TOKEN
CLOUD_ID: $JARVISLABS_ID
CLOUD_AT: $JARVISLABS_ACCESS_TOKEN
WANDB_PROJECT: $WANDB_PROJECT
WANDB_API_KEY: $WANDB_API_KEY
run: |
envsubst \
'$CUR_BRANCH, \
$CUR_PR_ID, \
$GDRIVE_CREDENTIAL, \
$GH_ACCESS_TOKEN, \
$CLOUD_ID, \
$CLOUD_AT' \
< scripts/js_exp_wandb.sh \
> scripts/js_exp_wandb_tmp.sh

- name: install jarvislabs client
run: |
pip install typer
pip install git+https://github.com/jarvislabsai/jlclient.git

- name: add script to jarvislabs
id: add_script
run: |
python clouds/jarvislabs.py \
script add \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} scripts/js_exp_wandb_tmp.sh \
> outputs.txt
echo "::set-output name=script_id::$(cat outputs.txt)"

- name: create vm on jarvislabs
run: |
python clouds/jarvislabs.py \
vm create \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} ${{ steps.add_script.outputs.script_id }}

- name: remove script from jarvislabs
run: |
python clouds/jarvislabs.py \
script remove \
${{ env.CLOUD_AT }} ${{ env.CLOUD_ID }} ${{ steps.add_script.outputs.script_id }}
82 changes: 82 additions & 0 deletions .github/workflows/experiments.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: Train Trigger

on:
pull_request:
types: [opened]

issue_comment:
types: [created]

jobs:
experiments-on-jarvislabs:
runs-on: ubuntu-latest
steps:
- uses: octokit/request-action@v2.0.0
name: GitHub API Request
id: request
with:
route: ${{ github.event.issue.pull_request.url }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Get PR informations
id: pr_data
run: |
echo "::set-output name=number::${{ fromJson(steps.request.outputs.data).number }}"
echo "::set-output name=branch::${{ fromJson(steps.request.outputs.data).head.ref }}"
echo "::set-output name=repo_owner::${{ github.event.repository.owner.login }}"
echo "::set-output name=comment_owner::${{ github.event.sender.login }}"
echo "::set-output name=comment::${{ github.event.comment.body }}"

- name: Extract comment
if: ${{ steps.pr_data.outputs.repo_owner == steps.pr_data.outputs.comment_owner }}
run: |
echo "Eligible!!"

- uses: khan/pull-request-comment-trigger@v1.1.0
name: Listen to comment on PR (training)
id: check_dvclive
if: ${{ steps.pr_data.outputs.repo_owner == steps.pr_data.outputs.comment_owner }}
with:
trigger: '#train --with dvclive'
env:
GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'

- uses: khan/pull-request-comment-trigger@v1.1.0
name: Listen to comment on PR (training)
id: check_wandb
if: ${{ steps.pr_data.outputs.repo_owner == steps.pr_data.outputs.comment_owner }}
with:
trigger: '#train --with wandb'
env:
GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'

- uses: actions/checkout@v3
name: clone branch of PR
with:
token: ${{ secrets.GITHUB_TOKEN }}
ref: ${{ steps.pr_data.outputs.branch }}

- name: clone branch of PR
if: steps.check_dvclive.outputs.triggered == 'true'
uses: ./.github/actions/exp/jl-dvc
with:
CUR_BRANCH: ${{ steps.pr_data.outputs.branch }}
CUR_PR_ID: ${{ steps.pr_data.outputs.number }}
GDRIVE_CREDENTIAL: ${{ secrets.GDRIVE_CREDENTIAL }}
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
JARVISLABS_ID: ${{ secrets.JARVISLABS_USER_EMAIL }}
JARVISLABS_ACCESS_TOKEN: ${{ secrets.JARVISLABS_ACCESS_TOKEN }}

- name: clone branch of PR
if: steps.check_wandb.outputs.triggered == 'true'
uses: ./.github/actions/exp/jl-wandb
with:
CUR_BRANCH: ${{ steps.pr_data.outputs.branch }}
CUR_PR_ID: ${{ steps.pr_data.outputs.number }}
GDRIVE_CREDENTIAL: ${{ secrets.GDRIVE_CREDENTIAL }}
GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
JARVISLABS_ID: ${{ secrets.JARVISLABS_USER_EMAIL }}
JARVISLABS_ACCESS_TOKEN: ${{ secrets.JARVISLABS_ACCESS_TOKEN }}
WANDB_PROJECT: ${{ secrets.WANDB_PROJECT }}
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
105 changes: 0 additions & 105 deletions .github/workflows/jarvislabs-experiments.yml

This file was deleted.

44 changes: 32 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ This project shows how to realize MLOps in Git/GitHub. In order to achieve this
4. Run `dvc add [ADDED FILE OR DIRECTORY]` to track your data with DVC
5. Run `dvc remote add -d gdrive_storage gdrive://[ID of specific folder in gdrive]` to add Google Drive as the remote data storage
6. Run `dvc push`, then URL to auth is provided. Copy and paste it to the browser, and autheticate
7. Copy the content of `.dvc/tmp/gdrive-user-credentials.json` and put it as in [GitHub Secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) with the name of `GDRIVE_CREDENTIALS`
7. Copy the content of `.dvc/tmp/gdrive-user-credentials.json` and put it as in [GitHub Secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) with the name of `GDRIVE_CREDENTIAL`
8. Run `git add . && git commit -m "initial commit" && git push origin main` to keep the initial setup
9. Write your own pipeline under `pipeline` directory. Codes for basic image classification in TensorFlow are provided initially.
10. Run the following `dvc stage add` for training stage
```bash
# if you want to use Iterative Studio / DVCLive for tracking training progress
$ dvc stage add -n train \
-p train.train_size,train.batch_size,train.epoch,train.lr \
-d pipeline/modeling.py -d pipeline/train.py -d data \
Expand All @@ -35,25 +36,44 @@ $ dvc stage add -n train \
--plots-no-cache dvclive/scalars/eval/sparse_categorical_accuracy.tsv \
-o outputs/model \
python pipeline/train.py outputs/model

# if you want to use W&B for tracking training progress
$ dvc stage add -n train \
-p train.train_size,train.batch_size,train.epoch,train.lr \
-d pipeline/modeling.py -d pipeline/train.py -d data \
-o outputs/model \
python pipeline/train.py outputs/model
```
10. Run the following `dvc stage add` for evaluate stage
11. Run the following `dvc stage add` for evaluate stage
```bash
# if you want to use Iterative Studio / DVCLive for tracking training progress
$ dvc stage add -n evaluate \
-p evaluate.test,evaluate.batch_size \
-d pipeline/evaluate.py -d data/test -d outputs/model \
-M outputs/metrics.json \
python pipeline/evaluate.py outputs/model

# if you want to use W&B for tracking training progress
$ dvc stage add -n evaluate \
-p evaluate.test,evaluate.batch_size \
-d pipeline/evaluate.py -d data/test -d outputs/model \
python pipeline/evaluate.py outputs/model
```
11. Update `params.yaml` as you need.
12. Run `git add . && git commit -m "add initial pipeline setup" && git push origin main`
13. Run `dvc repro` to run the pipeline initially
14. Run `dvc add outputs/model.tar.gz` to add compressed version of model
15. Run `dvc push outputs/model.tar.gz`
16. Run `echo "/pipeline/__pycache__" >> .gitignore` to ignore unnecessary directory
17. Run `git add . && git commit -m "add initial pipeline run" && git push origin main`
18. Add access token and user email of [JarvisLabs.ai](https://jarvislabs.ai/) to GitHub Secret as `JARVISLABS_ACCESS_TOKEN` and `JARVISLABS_USER_EMAIL`
19. Add GitHub access token to GitHub Secret as `GH_ACCESS_TOKEN`
20. Create a PR and write `#train` as in comment (you have to be the onwer of the repo)
12. Update `params.yaml` as you need.
13. Run `git add . && git commit -m "add initial pipeline setup" && git push origin main`
14. Run `dvc repro` to run the pipeline initially
15. Run `dvc add outputs/model.tar.gz` to add compressed version of model
16. Run `dvc push outputs/model.tar.gz`
17. Run `echo "/pipeline/__pycache__" >> .gitignore` to ignore unnecessary directory
18. Run `git add . && git commit -m "add initial pipeline run" && git push origin main`
19. Add access token and user email of [JarvisLabs.ai](https://jarvislabs.ai/) to GitHub Secret as `JARVISLABS_ACCESS_TOKEN` and `JARVISLABS_USER_EMAIL`
20. Add GitHub access token to GitHub Secret as `GH_ACCESS_TOKEN`
21. Create a PR and write `#train` as in comment (you have to be the onwer of the repo)

### W&B Integration Setup

1. Add W&B's project name to GitHub Secret as `WANDB_PROJECT`
2. Add W&B's API KEY to GitHub Secret as `WANDB_API_KEY`

### HuggingFace Integration Setup

Expand Down
Loading