Skip to content
This repository has been archived by the owner on Oct 13, 2023. It is now read-only.

6 Local Training #9

Merged
merged 45 commits into from
Oct 13, 2020
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
eba3160
Create Dockerfile and trainer.sh
Oct 5, 2020
6c29fbd
Install dependencies and execute a specified project
Oct 5, 2020
783a5b6
Modify trainer.sh to take ENV variable from docker environment
Oct 5, 2020
0746a59
Change base docker image from python3.8:buster to ubuntu:20.04
Oct 5, 2020
cd5f5d6
Implement local execution option
Oct 5, 2020
3ef357c
Add print Python 3.5 compatability
Oct 5, 2020
f11b878
Change project name locating to model path
Oct 6, 2020
ce61092
Fix not finding script location issue with relative path
Oct 6, 2020
93cf6f5
Design CLI tool to run in github repo
Oct 6, 2020
f3d618e
Fix git dependency fail
Oct 6, 2020
e28cee2
Adjust remote cloning based on commit sha
Oct 6, 2020
a8a385f
Log docker output into logs/docker folder in a time stamped log file
Oct 7, 2020
a3ddcd1
Allow user to define custom branch
Oct 7, 2020
f8779d7
Add fast local run time option and refactor out github repo checks
Oct 7, 2020
8d4120a
Modify path to store logs
Oct 7, 2020
275d14f
Remove branch sepecific clone for now
Oct 7, 2020
594134a
Add conda environment.yml dependency install support
Oct 7, 2020
a27a707
Add click argument for option of training script
Oct 8, 2020
efaadb6
Modify Github Token checking to test equality with None
Oct 8, 2020
7fb9fe3
Fix wrong variable name
Oct 8, 2020
66ada80
Fix docker build image issue (not building in hydra package folder
Oct 8, 2020
400792c
Add explanation to script
Oct 8, 2020
a145c91
Report exception for non-implemented parts
Oct 8, 2020
e680d85
Add tests for utility functions
Oct 8, 2020
5493b0a
Refactor json to string
Oct 8, 2020
671bb50
Use gitpython to get branch name, refactoring
Oct 8, 2020
b1194ed
Fix minor extra arg issue
Oct 8, 2020
72cb1ac
Refactor git check code for easier testing
Oct 8, 2020
0dbda0c
Add tests for github functions
Oct 8, 2020
7578468
Add test for CLI
Oct 9, 2020
fb93254
Update requirement
Oct 9, 2020
e1057a9
Check coverage percentage
Oct 9, 2020
937c6fa
Add tests for GitRepo class
Oct 9, 2020
cb871e2
Refactor GitRepo class into its single file
Oct 9, 2020
d2308ca
Refactor out running procedures into classes
Oct 9, 2020
a85e221
Add local platform training procedure
Oct 9, 2020
7adb47e
Add Google Cloud class basic outline
Oct 9, 2020
071be9d
Order dict items for python 3.5 compat to avoid flaky tests
Oct 10, 2020
64820af
Minor name change
Oct 10, 2020
b74e4cd
Fix OrderDict import
Oct 10, 2020
81f6fd2
Add test coverage to github workflow
Oct 13, 2020
05d5397
Add help for option flag
Oct 13, 2020
6270069
Revert breaking change to workflow
Oct 13, 2020
6cb8fa2
Workflow add test coverage
Oct 13, 2020
6489642
Add dependency in github workflow
Oct 13, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM continuumio/miniconda3

ADD executor.sh /home
WORKDIR /home

ENTRYPOINT ["sh", "executor.sh"]
9 changes: 9 additions & 0 deletions docker/executor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
mkdir project
cd project

git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL .
git checkout $COMMIT_SHA

conda env create -f environment.yml

tsa87 marked this conversation as resolved.
Show resolved Hide resolved
conda run -n hydra $PREFIX_PARAMS python3 $MODEL_PATH
22 changes: 22 additions & 0 deletions docker/local_execution.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

DIR="$( dirname "${BASH_SOURCE[0]}" )"
LOG_NAME=$(date +'%Y_%m_%d_%H_%M_%S')

cd $DIR
#TODO Bug fix building hydra image
docker build -t hydra_image .
# Create logs folder if it doesn't exist

docker run \
-e GIT_URL=$1 \
-e COMMIT_SHA=$2 \
-e OAUTH_TOKEN=$3 \
-e MODEL_PATH=$4 \
-e PREFIX_PARAMS=$5 \
hydra_image:latest 2>&1 | tee ${LOG_NAME}.log

cd -
pwd
mkdir -p tmp/hydra

mv ${DIR}/${LOG_NAME}.log tmp/hydra/
42 changes: 35 additions & 7 deletions hydra/cli.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,46 @@
import os
import re
import json
import click
import subprocess
from hydra.utils import check_repo
from hydra.version import __version__


@click.group()
@click.version_option(__version__)
def cli():
pass


@cli.command()
@click.option('--project_name')
@click.option('--model_name')
@click.option('--cpu')
@click.option('--memory')
@click.option('--options')
def train(project_name, model_name, cpu, memory, options):
@click.option('-m', '--model_path', required=True, type=str)
@click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required')
@click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required')
@click.option('--cloud', default='local', type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False))
@click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var
@click.option('-b', '--branch', default='master', type=str)
@click.option('-o', '--options', default='{}', type=str)

tsa87 marked this conversation as resolved.
Show resolved Hide resolved
def train(model_path, cpu, memory, github_token, cloud, branch, options):
click.echo("This is the training command")
click.echo("Running on {}".format(cloud))

options = json.loads(options)

prefix_params = ""
for key, value in options.items():
prefix_params += key + "=" + str(value) + " "

if cloud == 'fast_local':
subprocess.run([prefix_params, 'python3', model_path])

check_repo(github_token, branch)
git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip()
# Remove https://www. prefix
git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/')
commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip()

if cloud == 'local':
subprocess.run(
['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'),
git_url, commit_sha, github_token, model_path, prefix_params])
23 changes: 23 additions & 0 deletions hydra/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import git

def check_repo(github_token, branch):
if github_token == None:
raise Exception("GITHUB_TOKEN not found in environment variable or as argument")

repo = git.Repo(os.getcwd())
if (repo.bare):
raise Exception("This is not a git repo")

count_modified_files = len(repo.index.diff(None))
count_staged_files = len(repo.index.diff("HEAD"))
count_unpushed_commits = len(list(repo.iter_commits(branch+'@{u}..master')))
tsa87 marked this conversation as resolved.
Show resolved Hide resolved

if count_unpushed_commits > 0:
raise Exception("Some commits are not pushed to master branch.")
tsa87 marked this conversation as resolved.
Show resolved Hide resolved

if count_staged_files > 0:
raise Exception("Some staged files are not commited.")

if count_modified_files > 0:
raise Exception("Some modified files are not staged for commit.")
2 changes: 1 addition & 1 deletion hydra/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.0'
__version__ = '0.1.0'
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
click==7.1.2
click==7.1.2