From eba3160a9f6b2214aae6b29d3c661f3d1c673e33 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 09:17:30 -0700 Subject: [PATCH 01/45] Create Dockerfile and trainer.sh trainer.sh takes in github url, oauth token as argument and clones a private repo Dockerfile is based on Python 3.7 base image, and calls the trainer script --- docker/Dockerfile | 6 ++++++ docker/trainer.sh | 12 ++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 docker/Dockerfile create mode 100644 docker/trainer.sh diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..a1a6332 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.7-buster + +ADD trainer.sh /home +WORKDIR /home + +ENTRYPOINT ["sh", "trainer.sh"] diff --git a/docker/trainer.sh b/docker/trainer.sh new file mode 100644 index 0000000..27d6073 --- /dev/null +++ b/docker/trainer.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +while getopts r:t: flag +do + case "${flag}" in + r) git_url=${OPTARG};; + t) oauth_token=${OPTARG}; + esac +done + +echo "Github Repo URL: $git_url"; +git clone https://$oauth_token:x-oauth-basic@$git_url From 6c29fbd769035efe475f5a97d212eb87b4c8b166 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 10:06:12 -0700 Subject: [PATCH 02/45] Install dependencies and execute a specified project --- docker/trainer.sh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docker/trainer.sh b/docker/trainer.sh index 27d6073..fcffe52 100644 --- a/docker/trainer.sh +++ b/docker/trainer.sh @@ -1,12 +1,21 @@ #!/bin/bash -while getopts r:t: flag +while getopts r:t:p: flag do case "${flag}" in r) git_url=${OPTARG};; - t) oauth_token=${OPTARG}; + t) oauth_token=${OPTARG};; + p) project_name=${OPTARG}; esac done -echo "Github Repo URL: $git_url"; -git clone https://$oauth_token:x-oauth-basic@$git_url +echo "Github Repo URL: $git_url" + +mkdir project +cd project + +git clone https://$oauth_token:x-oauth-basic@$git_url . + +cd $project_name +pip install -r requirements.txt +python train.py From 783a5b6da096d6e73c5a020696b7a4a816c13b3c Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 11:24:11 -0700 Subject: [PATCH 03/45] Modify trainer.sh to take ENV variable from docker environment --- docker/trainer.sh | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/docker/trainer.sh b/docker/trainer.sh index fcffe52..83e6eba 100644 --- a/docker/trainer.sh +++ b/docker/trainer.sh @@ -1,21 +1,12 @@ #!/bin/bash -while getopts r:t:p: flag -do - case "${flag}" in - r) git_url=${OPTARG};; - t) oauth_token=${OPTARG};; - p) project_name=${OPTARG}; - esac -done - -echo "Github Repo URL: $git_url" +echo "Github Repo URL: $GIT_URL" mkdir project cd project -git clone https://$oauth_token:x-oauth-basic@$git_url . +git clone -b test-samples https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . -cd $project_name +cd $PROJECT_NAME pip install -r requirements.txt python train.py From 0746a598ff71f7f79aa74c6065ed0ef28b15c691 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 12:28:01 -0700 Subject: [PATCH 04/45] Change base docker image from python3.8:buster to ubuntu:20.04 Ubuntu has better compatibility with data scientists --- docker/Dockerfile | 8 +++++++- docker/trainer.sh | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a1a6332..a0fbf06 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,10 @@ -FROM python:3.7-buster +FROM ubuntu:20.04 + +RUN apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y git + +RUN apt-get install python3-pip -y ADD trainer.sh /home WORKDIR /home diff --git a/docker/trainer.sh b/docker/trainer.sh index 83e6eba..beaa1af 100644 --- a/docker/trainer.sh +++ b/docker/trainer.sh @@ -8,5 +8,5 @@ cd project git clone -b test-samples https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . cd $PROJECT_NAME -pip install -r requirements.txt -python train.py +pip3 install -r requirements.txt +python3 train.py From cd5f5d60160d17adf1fc816aa16017906fd9155d Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 13:39:51 -0700 Subject: [PATCH 05/45] Implement local execution option - Modify CIL options - Add local_execution.sh that builds the docker image for local training - Change the name of trainer.sh to executor.sh --- docker/Dockerfile | 4 ++-- docker/{trainer.sh => executor.sh} | 0 docker/local_execution.sh | 8 ++++++++ hydra/cli.py | 19 ++++++++++++------- 4 files changed, 22 insertions(+), 9 deletions(-) rename docker/{trainer.sh => executor.sh} (100%) create mode 100644 docker/local_execution.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index a0fbf06..34819c4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && \ RUN apt-get install python3-pip -y -ADD trainer.sh /home +ADD executor.sh /home WORKDIR /home -ENTRYPOINT ["sh", "trainer.sh"] +ENTRYPOINT ["sh", "executor.sh"] diff --git a/docker/trainer.sh b/docker/executor.sh similarity index 100% rename from docker/trainer.sh rename to docker/executor.sh diff --git a/docker/local_execution.sh b/docker/local_execution.sh new file mode 100644 index 0000000..18ea810 --- /dev/null +++ b/docker/local_execution.sh @@ -0,0 +1,8 @@ +#!/bin/bash +docker build -t hydra_image . + +docker run \ + -e GIT_URL=github.com/georgianpartners/hydra-ml-projects \ + -e PROJECT_NAME=$1 \ + -e OAUTH_TOKEN=$2 \ + hydra_image:latest diff --git a/hydra/cli.py b/hydra/cli.py index fdd3ce6..d160f6e 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,7 +1,7 @@ import click +import subprocess from hydra.version import __version__ - @click.group() @click.version_option(__version__) def cli(): @@ -9,10 +9,15 @@ def cli(): @cli.command() -@click.option('--project_name') -@click.option('--model_name') -@click.option('--cpu') -@click.option('--memory') -@click.option('--options') -def train(project_name, model_name, cpu, memory, options): +@click.option('-p', '--project_name', required=True, type=str) +@click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') +@click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') +@click.option('--cloud', default='local', type=click.Choice(['local', 'aws', 'gcp', 'azure'], case_sensitive=False)) +@click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var + +def train(project_name, cpu, memory, github_token, cloud): click.echo("This is the training command") + click.echo(f"Running on {cloud}.") + + if cloud == 'local': + subprocess.run(['sh', 'docker/local_execution.sh', project_name, github_token]) From 3ef357cb10369a30d1f851f2f05d6b991641082a Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Mon, 5 Oct 2020 14:02:49 -0700 Subject: [PATCH 06/45] Add print Python 3.5 compatability --- hydra/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/cli.py b/hydra/cli.py index d160f6e..6a5d2ed 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -17,7 +17,7 @@ def cli(): def train(project_name, cpu, memory, github_token, cloud): click.echo("This is the training command") - click.echo(f"Running on {cloud}.") + click.echo("Running on {}.".format(cloud)) if cloud == 'local': subprocess.run(['sh', 'docker/local_execution.sh', project_name, github_token]) From f11b878ae9ea82d20052897a9400c88c74cfcbb8 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 6 Oct 2020 09:42:46 -0700 Subject: [PATCH 07/45] Change project name locating to model path --- docker/executor.sh | 5 ++--- docker/local_execution.sh | 2 +- hydra/cli.py | 8 ++++---- hydra/version.py | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docker/executor.sh b/docker/executor.sh index beaa1af..f0f6e28 100644 --- a/docker/executor.sh +++ b/docker/executor.sh @@ -5,8 +5,7 @@ echo "Github Repo URL: $GIT_URL" mkdir project cd project -git clone -b test-samples https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . +git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . -cd $PROJECT_NAME pip3 install -r requirements.txt -python3 train.py +python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh index 18ea810..41b84a6 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -3,6 +3,6 @@ docker build -t hydra_image . docker run \ -e GIT_URL=github.com/georgianpartners/hydra-ml-projects \ - -e PROJECT_NAME=$1 \ + -e MODEL_PATH=$1 \ -e OAUTH_TOKEN=$2 \ hydra_image:latest diff --git a/hydra/cli.py b/hydra/cli.py index 6a5d2ed..cfdb06b 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -9,15 +9,15 @@ def cli(): @cli.command() -@click.option('-p', '--project_name', required=True, type=str) +@click.option('-m', '--model_path', required=True, type=str) @click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') @click.option('--cloud', default='local', type=click.Choice(['local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var -def train(project_name, cpu, memory, github_token, cloud): +def train(model_path, cpu, memory, github_token, cloud): click.echo("This is the training command") - click.echo("Running on {}.".format(cloud)) + click.echo("Running on {}".format(cloud)) if cloud == 'local': - subprocess.run(['sh', 'docker/local_execution.sh', project_name, github_token]) + subprocess.run(['sh', 'docker/local_execution.sh', model_path, github_token]) diff --git a/hydra/version.py b/hydra/version.py index 541f859..b794fd4 100644 --- a/hydra/version.py +++ b/hydra/version.py @@ -1 +1 @@ -__version__ = '0.1.0' \ No newline at end of file +__version__ = '0.1.0' From ce610929d1abf2899459f2c2b1cf93c3ba43735a Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 6 Oct 2020 10:04:47 -0700 Subject: [PATCH 08/45] Fix not finding script location issue with relative path --- hydra/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hydra/cli.py b/hydra/cli.py index cfdb06b..91efa5d 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,3 +1,4 @@ +import os import click import subprocess from hydra.version import __version__ @@ -20,4 +21,5 @@ def train(model_path, cpu, memory, github_token, cloud): click.echo("Running on {}".format(cloud)) if cloud == 'local': - subprocess.run(['sh', 'docker/local_execution.sh', model_path, github_token]) + subprocess.run( + ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), model_path, github_token]) From 93cf6f5ca214ed78c6d53f712101d2bb70c9e3a0 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 6 Oct 2020 11:40:45 -0700 Subject: [PATCH 09/45] Design CLI tool to run in github repo First, check if local repo match with remote master branch. Then, in docker, clone the remote repo and run the specified job --- docker/local_execution.sh | 6 +++--- hydra/cli.py | 25 ++++++++++++++++++++++++- requirements.txt | 3 ++- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index 41b84a6..e77ff16 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -2,7 +2,7 @@ docker build -t hydra_image . docker run \ - -e GIT_URL=github.com/georgianpartners/hydra-ml-projects \ - -e MODEL_PATH=$1 \ - -e OAUTH_TOKEN=$2 \ + -e GIT_URL=$1 \ + -e MODEL_PATH=$2 \ + -e OAUTH_TOKEN=$3 \ hydra_image:latest diff --git a/hydra/cli.py b/hydra/cli.py index 91efa5d..4676008 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,4 +1,6 @@ import os +import re +import git import click import subprocess from hydra.version import __version__ @@ -20,6 +22,27 @@ def train(model_path, cpu, memory, github_token, cloud): click.echo("This is the training command") click.echo("Running on {}".format(cloud)) + repo = git.Repo(os.getcwd()) + if (repo.bare): + raise Exception("This is not a git repo") + + count_modified_files = len(repo.index.diff(None)) + count_staged_files = len(repo.index.diff("HEAD")) + count_unpushed_commits = len(list(repo.iter_commits('master@{u}..master'))) + + if count_unpushed_commits > 0: + raise Exception("Some commits are not pushed to master branch.") + + if count_staged_files > 0: + raise Exception("Some staged files are not commited.") + + if count_modified_files > 0: + raise Exception("Some modified files are not staged for commit.") + + git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() + # Remove https://www. prefix + git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') + if cloud == 'local': subprocess.run( - ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), model_path, github_token]) + ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), git_url, model_path, github_token]) diff --git a/requirements.txt b/requirements.txt index 609dec7..ac95e93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -click==7.1.2 \ No newline at end of file +click==7.1.2 +git==2.24.3 From f3d618e68d70bc7a27834aea9a8c7795c1d9dec0 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 6 Oct 2020 11:44:28 -0700 Subject: [PATCH 10/45] Fix git dependency fail --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ac95e93..f7fc59b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ click==7.1.2 -git==2.24.3 From e28cee226d1011ab06070087e97273f2ed5e5cff Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 6 Oct 2020 12:35:53 -0700 Subject: [PATCH 11/45] Adjust remote cloning based on commit sha --- docker/executor.sh | 5 ++--- docker/local_execution.sh | 6 +++--- hydra/cli.py | 6 +++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docker/executor.sh b/docker/executor.sh index f0f6e28..1f1e9e5 100644 --- a/docker/executor.sh +++ b/docker/executor.sh @@ -1,11 +1,10 @@ -#!/bin/bash - -echo "Github Repo URL: $GIT_URL" +echo "Github Repo: https://$OAUTH_TOKEN:x-oauth-basic@${GIT_URL}/tree/${COMMIT_SHA}" mkdir project cd project git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . +git checkout $COMMIT_SHA pip3 install -r requirements.txt python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh index e77ff16..c3a4a93 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,8 +1,8 @@ -#!/bin/bash docker build -t hydra_image . docker run \ -e GIT_URL=$1 \ - -e MODEL_PATH=$2 \ - -e OAUTH_TOKEN=$3 \ + -e COMMIT_SHA=$2 \ + -e MODEL_PATH=$3 \ + -e OAUTH_TOKEN=$4 \ hydra_image:latest diff --git a/hydra/cli.py b/hydra/cli.py index 4676008..9c09035 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -22,6 +22,9 @@ def train(model_path, cpu, memory, github_token, cloud): click.echo("This is the training command") click.echo("Running on {}".format(cloud)) + if github_token == "": + raise Exception("GITHUB_TOKEN not found in environment variable or as argument") + repo = git.Repo(os.getcwd()) if (repo.bare): raise Exception("This is not a git repo") @@ -42,7 +45,8 @@ def train(model_path, cpu, memory, github_token, cloud): git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() # Remove https://www. prefix git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') + commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip() if cloud == 'local': subprocess.run( - ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), git_url, model_path, github_token]) + ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), git_url, commit_sha, model_path, github_token]) From a8a385f899fd8501a58e3ae7e8a371401f0c0918 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 09:43:46 -0700 Subject: [PATCH 12/45] Log docker output into logs/docker folder in a time stamped log file --- docker/local_execution.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index c3a4a93..1e5eee1 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,8 +1,11 @@ docker build -t hydra_image . +# Create logs folder if it doesn't exist +mkdir -p logs/docker + docker run \ -e GIT_URL=$1 \ -e COMMIT_SHA=$2 \ -e MODEL_PATH=$3 \ -e OAUTH_TOKEN=$4 \ - hydra_image:latest + hydra_image:latest > logs/docker/$(date +'%Y_%m_%d_%H_%M_%S').log From a3ddcd1978e90d167871a7ad62ed733057edfc79 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 10:01:26 -0700 Subject: [PATCH 13/45] Allow user to define custom branch --- hydra/cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index 9c09035..cc4879f 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -17,10 +17,12 @@ def cli(): @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') @click.option('--cloud', default='local', type=click.Choice(['local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var +@click.option('-b', '--branch', default='master', type=str) -def train(model_path, cpu, memory, github_token, cloud): +def train(model_path, cpu, memory, github_token, cloud, branch): click.echo("This is the training command") click.echo("Running on {}".format(cloud)) + click.echo("Remote branch: {}".format(branch)) if github_token == "": raise Exception("GITHUB_TOKEN not found in environment variable or as argument") @@ -31,7 +33,7 @@ def train(model_path, cpu, memory, github_token, cloud): count_modified_files = len(repo.index.diff(None)) count_staged_files = len(repo.index.diff("HEAD")) - count_unpushed_commits = len(list(repo.iter_commits('master@{u}..master'))) + count_unpushed_commits = len(list(repo.iter_commits(branch+'@{u}..master'))) if count_unpushed_commits > 0: raise Exception("Some commits are not pushed to master branch.") From f8779d70eee807779f5767ee9522e5c90890a14e Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 10:25:17 -0700 Subject: [PATCH 14/45] Add fast local run time option and refactor out github repo checks --- docker/local_execution.sh | 1 + hydra/cli.py | 27 +++++---------------------- hydra/utils.py | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+), 22 deletions(-) create mode 100644 hydra/utils.py diff --git a/docker/local_execution.sh b/docker/local_execution.sh index 1e5eee1..ed4762d 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -8,4 +8,5 @@ docker run \ -e COMMIT_SHA=$2 \ -e MODEL_PATH=$3 \ -e OAUTH_TOKEN=$4 \ + -e BRANCH=$5 \ hydra_image:latest > logs/docker/$(date +'%Y_%m_%d_%H_%M_%S').log diff --git a/hydra/cli.py b/hydra/cli.py index cc4879f..823456c 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,8 +1,8 @@ import os import re -import git import click import subprocess +from hydra.utils import check_repo from hydra.version import __version__ @click.group() @@ -15,35 +15,18 @@ def cli(): @click.option('-m', '--model_path', required=True, type=str) @click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') -@click.option('--cloud', default='local', type=click.Choice(['local', 'aws', 'gcp', 'azure'], case_sensitive=False)) +@click.option('--cloud', default='local', type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var @click.option('-b', '--branch', default='master', type=str) def train(model_path, cpu, memory, github_token, cloud, branch): click.echo("This is the training command") click.echo("Running on {}".format(cloud)) - click.echo("Remote branch: {}".format(branch)) - if github_token == "": - raise Exception("GITHUB_TOKEN not found in environment variable or as argument") - - repo = git.Repo(os.getcwd()) - if (repo.bare): - raise Exception("This is not a git repo") - - count_modified_files = len(repo.index.diff(None)) - count_staged_files = len(repo.index.diff("HEAD")) - count_unpushed_commits = len(list(repo.iter_commits(branch+'@{u}..master'))) - - if count_unpushed_commits > 0: - raise Exception("Some commits are not pushed to master branch.") - - if count_staged_files > 0: - raise Exception("Some staged files are not commited.") - - if count_modified_files > 0: - raise Exception("Some modified files are not staged for commit.") + if cloud == 'fast_local': + subprocess.run(['python3', model_path]) + check_repo(github_token, branch) git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() # Remove https://www. prefix git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') diff --git a/hydra/utils.py b/hydra/utils.py new file mode 100644 index 0000000..dd20efd --- /dev/null +++ b/hydra/utils.py @@ -0,0 +1,23 @@ +import os +import git + +def check_repo(github_token, branch): + if github_token == "": + raise Exception("GITHUB_TOKEN not found in environment variable or as argument") + + repo = git.Repo(os.getcwd()) + if (repo.bare): + raise Exception("This is not a git repo") + + count_modified_files = len(repo.index.diff(None)) + count_staged_files = len(repo.index.diff("HEAD")) + count_unpushed_commits = len(list(repo.iter_commits(branch+'@{u}..master'))) + + if count_unpushed_commits > 0: + raise Exception("Some commits are not pushed to master branch.") + + if count_staged_files > 0: + raise Exception("Some staged files are not commited.") + + if count_modified_files > 0: + raise Exception("Some modified files are not staged for commit.") From 8d4120a0bd74474b391b7bbe26883683371e4d46 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 10:27:39 -0700 Subject: [PATCH 15/45] Modify path to store logs --- docker/local_execution.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index ed4762d..b7fe26e 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,7 +1,7 @@ docker build -t hydra_image . # Create logs folder if it doesn't exist -mkdir -p logs/docker +mkdir -p tmp/hydra docker run \ -e GIT_URL=$1 \ @@ -9,4 +9,4 @@ docker run \ -e MODEL_PATH=$3 \ -e OAUTH_TOKEN=$4 \ -e BRANCH=$5 \ - hydra_image:latest > logs/docker/$(date +'%Y_%m_%d_%H_%M_%S').log + hydra_image:latest > tmp/hydra/$(date +'%Y_%m_%d_%H_%M_%S').log From 275d14ff0da2bb3b1513fde99072d388d18a7c7e Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 10:29:09 -0700 Subject: [PATCH 16/45] Remove branch sepecific clone for now --- docker/local_execution.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index b7fe26e..4c5426c 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -8,5 +8,4 @@ docker run \ -e COMMIT_SHA=$2 \ -e MODEL_PATH=$3 \ -e OAUTH_TOKEN=$4 \ - -e BRANCH=$5 \ hydra_image:latest > tmp/hydra/$(date +'%Y_%m_%d_%H_%M_%S').log From 594134ab415e0eccb10a53c875b9033a81d460e8 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Wed, 7 Oct 2020 12:41:36 -0700 Subject: [PATCH 17/45] Add conda environment.yml dependency install support --- docker/Dockerfile | 10 ++-------- docker/executor.sh | 6 ++++-- docker/local_execution.sh | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 34819c4..2d7b84a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,12 +1,6 @@ -FROM ubuntu:20.04 +FROM continuumio/miniconda3 -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get install -y git - -RUN apt-get install python3-pip -y - -ADD executor.sh /home +ADD docker/executor.sh /home WORKDIR /home ENTRYPOINT ["sh", "executor.sh"] diff --git a/docker/executor.sh b/docker/executor.sh index 1f1e9e5..81d1e1f 100644 --- a/docker/executor.sh +++ b/docker/executor.sh @@ -6,5 +6,7 @@ cd project git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . git checkout $COMMIT_SHA -pip3 install -r requirements.txt -python3 $MODEL_PATH +conda env create -f environment.yml +conda run -n hydra /bin/bash -c + +conda run -n hydra python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh index 4c5426c..199fcbd 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,5 +1,5 @@ +#TODO Bug fix building hydra image docker build -t hydra_image . - # Create logs folder if it doesn't exist mkdir -p tmp/hydra From a27a7079e694188a821dfb6e54429ebe256fb062 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 07:53:44 -0700 Subject: [PATCH 18/45] Add click argument for option of training script --- docker/executor.sh | 2 +- docker/local_execution.sh | 5 +++-- hydra/cli.py | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docker/executor.sh b/docker/executor.sh index 81d1e1f..397738b 100644 --- a/docker/executor.sh +++ b/docker/executor.sh @@ -9,4 +9,4 @@ git checkout $COMMIT_SHA conda env create -f environment.yml conda run -n hydra /bin/bash -c -conda run -n hydra python3 $MODEL_PATH +conda run -n hydra $PREFIX_PARAMS python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh index 199fcbd..cdfde71 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -6,6 +6,7 @@ mkdir -p tmp/hydra docker run \ -e GIT_URL=$1 \ -e COMMIT_SHA=$2 \ - -e MODEL_PATH=$3 \ - -e OAUTH_TOKEN=$4 \ + -e OAUTH_TOKEN=$3 \ + -e MODEL_PATH=$4 \ + -e PREFIX_PARAMS=$5 \ hydra_image:latest > tmp/hydra/$(date +'%Y_%m_%d_%H_%M_%S').log diff --git a/hydra/cli.py b/hydra/cli.py index 823456c..c0bb8e1 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,5 +1,6 @@ import os import re +import json import click import subprocess from hydra.utils import check_repo @@ -18,13 +19,20 @@ def cli(): @click.option('--cloud', default='local', type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var @click.option('-b', '--branch', default='master', type=str) +@click.option('-o', '--options', default='{}', type=str) -def train(model_path, cpu, memory, github_token, cloud, branch): +def train(model_path, cpu, memory, github_token, cloud, branch, options): click.echo("This is the training command") click.echo("Running on {}".format(cloud)) + options = json.loads(options) + + prefix_params = "" + for key, value in options.items(): + prefix_params += key + "=" + str(value) + " " + if cloud == 'fast_local': - subprocess.run(['python3', model_path]) + subprocess.run([command_prefix, 'python3', model_path]) check_repo(github_token, branch) git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() @@ -34,4 +42,5 @@ def train(model_path, cpu, memory, github_token, cloud, branch): if cloud == 'local': subprocess.run( - ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), git_url, commit_sha, model_path, github_token]) + ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), + git_url, commit_sha, github_token, model_path, prefix_params]) From efaadb65f6a65474124de04c0aea20d683cf6bd3 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 07:54:48 -0700 Subject: [PATCH 19/45] Modify Github Token checking to test equality with None --- hydra/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/utils.py b/hydra/utils.py index dd20efd..8b8c78f 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -2,7 +2,7 @@ import git def check_repo(github_token, branch): - if github_token == "": + if github_token == None: raise Exception("GITHUB_TOKEN not found in environment variable or as argument") repo = git.Repo(os.getcwd()) From 7fb9fe3a5dada3fb3de3232ad5d5b3e6e7e26a8e Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 08:56:21 -0700 Subject: [PATCH 20/45] Fix wrong variable name --- hydra/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/cli.py b/hydra/cli.py index c0bb8e1..d520ee6 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -32,7 +32,7 @@ def train(model_path, cpu, memory, github_token, cloud, branch, options): prefix_params += key + "=" + str(value) + " " if cloud == 'fast_local': - subprocess.run([command_prefix, 'python3', model_path]) + subprocess.run([prefix_params, 'python3', model_path]) check_repo(github_token, branch) git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() From 66ada80e7c365aade507c216d71b27cff63f926f Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 09:58:58 -0700 Subject: [PATCH 21/45] Fix docker build image issue (not building in hydra package folder --- docker/Dockerfile | 2 +- docker/executor.sh | 3 --- docker/local_execution.sh | 14 ++++++++++++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2d7b84a..f45cbd4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,6 @@ FROM continuumio/miniconda3 -ADD docker/executor.sh /home +ADD executor.sh /home WORKDIR /home ENTRYPOINT ["sh", "executor.sh"] diff --git a/docker/executor.sh b/docker/executor.sh index 397738b..ed93108 100644 --- a/docker/executor.sh +++ b/docker/executor.sh @@ -1,5 +1,3 @@ -echo "Github Repo: https://$OAUTH_TOKEN:x-oauth-basic@${GIT_URL}/tree/${COMMIT_SHA}" - mkdir project cd project @@ -7,6 +5,5 @@ git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . git checkout $COMMIT_SHA conda env create -f environment.yml -conda run -n hydra /bin/bash -c conda run -n hydra $PREFIX_PARAMS python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh index cdfde71..d7ec0cf 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,7 +1,11 @@ + +DIR="$( dirname "${BASH_SOURCE[0]}" )" +LOG_NAME=$(date +'%Y_%m_%d_%H_%M_%S') + +cd $DIR #TODO Bug fix building hydra image docker build -t hydra_image . # Create logs folder if it doesn't exist -mkdir -p tmp/hydra docker run \ -e GIT_URL=$1 \ @@ -9,4 +13,10 @@ docker run \ -e OAUTH_TOKEN=$3 \ -e MODEL_PATH=$4 \ -e PREFIX_PARAMS=$5 \ - hydra_image:latest > tmp/hydra/$(date +'%Y_%m_%d_%H_%M_%S').log + hydra_image:latest 2>&1 | tee ${LOG_NAME}.log + +cd - +pwd +mkdir -p tmp/hydra + +mv ${DIR}/${LOG_NAME}.log tmp/hydra/ From 400792c4b246202987d3ef562d673472d4f41ebd Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 11:12:35 -0700 Subject: [PATCH 22/45] Add explanation to script --- docker/local_execution.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index d7ec0cf..e310d5a 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -3,9 +3,7 @@ DIR="$( dirname "${BASH_SOURCE[0]}" )" LOG_NAME=$(date +'%Y_%m_%d_%H_%M_%S') cd $DIR -#TODO Bug fix building hydra image docker build -t hydra_image . -# Create logs folder if it doesn't exist docker run \ -e GIT_URL=$1 \ @@ -15,8 +13,7 @@ docker run \ -e PREFIX_PARAMS=$5 \ hydra_image:latest 2>&1 | tee ${LOG_NAME}.log +# Move Log file to where the program is being called cd - -pwd mkdir -p tmp/hydra - mv ${DIR}/${LOG_NAME}.log tmp/hydra/ From a145c911abe6535123d9f58aef8f01d7dab740b2 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 11:13:01 -0700 Subject: [PATCH 23/45] Report exception for non-implemented parts --- hydra/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index d520ee6..67c498a 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -11,20 +11,15 @@ def cli(): pass - @cli.command() @click.option('-m', '--model_path', required=True, type=str) @click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') -@click.option('--cloud', default='local', type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) +@click.option('--cloud', default='local', required=True, type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var @click.option('-b', '--branch', default='master', type=str) @click.option('-o', '--options', default='{}', type=str) - def train(model_path, cpu, memory, github_token, cloud, branch, options): - click.echo("This is the training command") - click.echo("Running on {}".format(cloud)) - options = json.loads(options) prefix_params = "" @@ -33,6 +28,7 @@ def train(model_path, cpu, memory, github_token, cloud, branch, options): if cloud == 'fast_local': subprocess.run([prefix_params, 'python3', model_path]) + return 0 check_repo(github_token, branch) git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() @@ -44,3 +40,6 @@ def train(model_path, cpu, memory, github_token, cloud, branch, options): subprocess.run( ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), git_url, commit_sha, github_token, model_path, prefix_params]) + return 0 + + raise Exception("Reached parts of Hydra that are not yet implemented.") From e680d85ecdef8900ea7f9e224ac5be2ea5f2030d Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 11:54:25 -0700 Subject: [PATCH 24/45] Add tests for utility functions --- tests/test_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..e17624b --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,19 @@ +import pytest +from hydra.utils import * + +def test_check_repo(): + with pytest.raises(Exception) as err: + check_repo(None, "master") + assert "GITHUB_TOKEN" in str(err.value) + +def test_json_to_string(): + test_json = '{"depth":10, "epoch":100}' + result = json_to_string(test_json) + + assert result == "depth=10 epoch=100" + +def test_empty_json_to_string(): + test_json = '{}' + result = json_to_string(test_json) + + assert result == "" From 5493b0a17a1becd48efba9ee35ce76b5455a9da1 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 11:55:46 -0700 Subject: [PATCH 25/45] Refactor json to string --- hydra/cli.py | 12 ++++++------ hydra/utils.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index 67c498a..40b7312 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,6 +1,5 @@ import os import re -import json import click import subprocess from hydra.utils import check_repo @@ -11,6 +10,11 @@ def cli(): pass +@click.command() +@click.argument('name') +def hello(name): + click.echo('Hello %s!' % name) + @cli.command() @click.option('-m', '--model_path', required=True, type=str) @click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') @@ -20,11 +24,7 @@ def cli(): @click.option('-b', '--branch', default='master', type=str) @click.option('-o', '--options', default='{}', type=str) def train(model_path, cpu, memory, github_token, cloud, branch, options): - options = json.loads(options) - - prefix_params = "" - for key, value in options.items(): - prefix_params += key + "=" + str(value) + " " + prefix_params = json_to_string(options) if cloud == 'fast_local': subprocess.run([prefix_params, 'python3', model_path]) diff --git a/hydra/utils.py b/hydra/utils.py index 8b8c78f..182a1d2 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -1,5 +1,6 @@ import os import git +import json def check_repo(github_token, branch): if github_token == None: @@ -11,7 +12,7 @@ def check_repo(github_token, branch): count_modified_files = len(repo.index.diff(None)) count_staged_files = len(repo.index.diff("HEAD")) - count_unpushed_commits = len(list(repo.iter_commits(branch+'@{u}..master'))) + count_unpushed_commits = len(list(repo.iter_commits('master@{u}..master'))) if count_unpushed_commits > 0: raise Exception("Some commits are not pushed to master branch.") @@ -21,3 +22,12 @@ def check_repo(github_token, branch): if count_modified_files > 0: raise Exception("Some modified files are not staged for commit.") + +def json_to_string(packet): + dic = json.loads(packet) + + params = "" + for key, value in dic.items(): + params += key + "=" + str(value) + " " + + return params.strip() From 671bb506a1e9059adca36a9c99c64cad6767677c Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 13:01:44 -0700 Subject: [PATCH 26/45] Use gitpython to get branch name, refactoring --- hydra/cli.py | 4 ++-- hydra/utils.py | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index 40b7312..e055324 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -3,6 +3,7 @@ import click import subprocess from hydra.utils import check_repo +from hydra.utils import json_to_string from hydra.version import __version__ @click.group() @@ -21,9 +22,8 @@ def hello(name): @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') @click.option('--cloud', default='local', required=True, type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var -@click.option('-b', '--branch', default='master', type=str) @click.option('-o', '--options', default='{}', type=str) -def train(model_path, cpu, memory, github_token, cloud, branch, options): +def train(model_path, cpu, memory, github_token, cloud, options): prefix_params = json_to_string(options) if cloud == 'fast_local': diff --git a/hydra/utils.py b/hydra/utils.py index 182a1d2..4294578 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -1,8 +1,9 @@ import os import git import json +import warnings -def check_repo(github_token, branch): +def check_repo(github_token): if github_token == None: raise Exception("GITHUB_TOKEN not found in environment variable or as argument") @@ -10,18 +11,24 @@ def check_repo(github_token, branch): if (repo.bare): raise Exception("This is not a git repo") + if len(repo.untracked_files) > 0: + warnings.warn('Following files are not tracked by git: ') + print(repo.untracked_files) + count_modified_files = len(repo.index.diff(None)) + if count_modified_files > 0: + raise Exception("Some modified files are not staged for commit.") + count_staged_files = len(repo.index.diff("HEAD")) - count_unpushed_commits = len(list(repo.iter_commits('master@{u}..master'))) + if count_staged_files > 0: + raise Exception("Some staged files are not commited.") + branch_name = repo.active_branch.name + count_unpushed_commits = len(list(repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) if count_unpushed_commits > 0: raise Exception("Some commits are not pushed to master branch.") - if count_staged_files > 0: - raise Exception("Some staged files are not commited.") - - if count_modified_files > 0: - raise Exception("Some modified files are not staged for commit.") + return 0 def json_to_string(packet): dic = json.loads(packet) From b1194ed0614ae3cb96a73132d43aaf7e2ad88141 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 13:03:26 -0700 Subject: [PATCH 27/45] Fix minor extra arg issue --- hydra/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/cli.py b/hydra/cli.py index e055324..2596431 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -30,7 +30,7 @@ def train(model_path, cpu, memory, github_token, cloud, options): subprocess.run([prefix_params, 'python3', model_path]) return 0 - check_repo(github_token, branch) + check_repo(github_token) git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() # Remove https://www. prefix git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') From 72cb1ac9acd4b9195b1e0ed7a209d2c7d94b3880 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 13:29:17 -0700 Subject: [PATCH 28/45] Refactor git check code for easier testing --- hydra/cli.py | 2 +- hydra/git_repo.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++ hydra/utils.py | 30 ----------------------------- 3 files changed, 49 insertions(+), 31 deletions(-) create mode 100644 hydra/git_repo.py diff --git a/hydra/cli.py b/hydra/cli.py index 2596431..5a11730 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -2,7 +2,7 @@ import re import click import subprocess -from hydra.utils import check_repo +from hydra.git_repo import check_repo from hydra.utils import json_to_string from hydra.version import __version__ diff --git a/hydra/git_repo.py b/hydra/git_repo.py new file mode 100644 index 0000000..72ff441 --- /dev/null +++ b/hydra/git_repo.py @@ -0,0 +1,48 @@ +import os +import git +import warnings + +class GitRepo(): + def __init__(self, repo): + self.repo = repo + + def is_empty(self): + return self.repo.bare + + def is_untracked(self): + return len(self.repo.untracked_files) > 0 + + def is_modified(self): + return len(self.repo.index.diff(None)) > 0 + + def is_uncommitted(self): + return len(self.repo.index.diff("HEAD")) > 0 + + def is_unsynced(self): + branch_name = self.repo.active_branch.name + count_unpushed_commits = len(list(self.repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) + return count_unpushed_commits > 0 + +def check_repo(github_token): + if github_token == None: + raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") + + repo = git.Repo(os.getcwd()) + repo = GitRepo(repo) + + if repo.is_empty(): + raise Exception("Hydra is not being called in the root of a git repo.") + + if repo.is_untracked(): + warnings.warn("Some files are not tracked by git.") + + if repo.is_modified(): + raise Exception("Some modified files are not staged for commit.") + + if repo.is_uncommitted(): + raise Exception("Some staged files are not commited.") + + if repo.is_unsynced(): + raise Exception("Some commits are not pushed to the remote repo.") + + return 0 diff --git a/hydra/utils.py b/hydra/utils.py index 4294578..0b84b33 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -1,34 +1,4 @@ -import os -import git import json -import warnings - -def check_repo(github_token): - if github_token == None: - raise Exception("GITHUB_TOKEN not found in environment variable or as argument") - - repo = git.Repo(os.getcwd()) - if (repo.bare): - raise Exception("This is not a git repo") - - if len(repo.untracked_files) > 0: - warnings.warn('Following files are not tracked by git: ') - print(repo.untracked_files) - - count_modified_files = len(repo.index.diff(None)) - if count_modified_files > 0: - raise Exception("Some modified files are not staged for commit.") - - count_staged_files = len(repo.index.diff("HEAD")) - if count_staged_files > 0: - raise Exception("Some staged files are not commited.") - - branch_name = repo.active_branch.name - count_unpushed_commits = len(list(repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) - if count_unpushed_commits > 0: - raise Exception("Some commits are not pushed to master branch.") - - return 0 def json_to_string(packet): dic = json.loads(packet) From 0dbda0cd651fb1376c986c2560feaef2bea3d798 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Thu, 8 Oct 2020 14:28:45 -0700 Subject: [PATCH 29/45] Add tests for github functions --- hydra/git_repo.py | 9 +- tests/test_cil.py | 9 ++ tests/test_dummy.py | 4 - tests/test_git_repo.py | 212 +++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 5 - 5 files changed, 226 insertions(+), 13 deletions(-) create mode 100644 tests/test_cil.py delete mode 100644 tests/test_dummy.py create mode 100644 tests/test_git_repo.py diff --git a/hydra/git_repo.py b/hydra/git_repo.py index 72ff441..650af9f 100644 --- a/hydra/git_repo.py +++ b/hydra/git_repo.py @@ -23,18 +23,19 @@ def is_unsynced(self): count_unpushed_commits = len(list(self.repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) return count_unpushed_commits > 0 -def check_repo(github_token): +def check_repo(github_token, repo=None): if github_token == None: raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") - repo = git.Repo(os.getcwd()) - repo = GitRepo(repo) + if repo is None: + repo = git.Repo(os.getcwd()) + repo = GitRepo(repo) if repo.is_empty(): raise Exception("Hydra is not being called in the root of a git repo.") if repo.is_untracked(): - warnings.warn("Some files are not tracked by git.") + warnings.warn("Some files are not tracked by git.", UserWarning) if repo.is_modified(): raise Exception("Some modified files are not staged for commit.") diff --git a/tests/test_cil.py b/tests/test_cil.py new file mode 100644 index 0000000..a468898 --- /dev/null +++ b/tests/test_cil.py @@ -0,0 +1,9 @@ +import pytest +from hydra.cli import * +from click.testing import CliRunner + +def test_hello_world(): + runner = CliRunner() + result = runner.invoke(hello, ['Peter']) + assert result.exit_code == 0 + assert result.output == 'Hello Peter!\n' diff --git a/tests/test_dummy.py b/tests/test_dummy.py deleted file mode 100644 index e09c119..0000000 --- a/tests/test_dummy.py +++ /dev/null @@ -1,4 +0,0 @@ -# dummy tests - -def test_dummy(): - assert 1 == 1 diff --git a/tests/test_git_repo.py b/tests/test_git_repo.py new file mode 100644 index 0000000..0ceb79e --- /dev/null +++ b/tests/test_git_repo.py @@ -0,0 +1,212 @@ +import pytest +import warnings +import pytest_mock +from hydra.git_repo import check_repo + +VALID_GITHUB_TOKEN = "Georgian" + +def test_check_repo_success(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + result = check_repo(VALID_GITHUB_TOKEN) + assert result == 0 + + +def test_check_repo_empty_token(): + with pytest.raises(Exception) as err: + check_repo(None) + assert "GITHUB_TOKEN" in str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Hydra is not being called in the root of a git repo." == str(err.value) + + +def test_check_repo_modified(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some modified files are not staged for commit." == str(err.value) + + +def test_check_repo_uncommitted(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some staged files are not commited." == str(err.value) + + +def test_check_repo_unsynced(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + fail_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some commits are not pushed to the remote repo."== str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.warns(UserWarning) as record: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some files are not tracked by git." == record[0].message.args[0] diff --git a/tests/test_utils.py b/tests/test_utils.py index e17624b..2b99162 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,11 +1,6 @@ import pytest from hydra.utils import * -def test_check_repo(): - with pytest.raises(Exception) as err: - check_repo(None, "master") - assert "GITHUB_TOKEN" in str(err.value) - def test_json_to_string(): test_json = '{"depth":10, "epoch":100}' result = json_to_string(test_json) From 7578468694defa2b79148c6d932c415c5fcefafe Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 08:12:35 -0700 Subject: [PATCH 30/45] Add test for CLI --- docker/local_execution.sh | 4 ++- hydra/cli.py | 19 +++++++------ hydra/git_repo.py | 2 ++ hydra/utils.py | 13 +++++++++ tests/test_cil.py | 57 +++++++++++++++++++++++++++++++++++---- 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/docker/local_execution.sh b/docker/local_execution.sh index e310d5a..797e47b 100644 --- a/docker/local_execution.sh +++ b/docker/local_execution.sh @@ -1,5 +1,7 @@ DIR="$( dirname "${BASH_SOURCE[0]}" )" + +# Add random Hash LOG_NAME=$(date +'%Y_%m_%d_%H_%M_%S') cd $DIR @@ -13,7 +15,7 @@ docker run \ -e PREFIX_PARAMS=$5 \ hydra_image:latest 2>&1 | tee ${LOG_NAME}.log -# Move Log file to where the program is being called +# Move Log file to where the program is being called cd - mkdir -p tmp/hydra mv ${DIR}/${LOG_NAME}.log tmp/hydra/ diff --git a/hydra/cli.py b/hydra/cli.py index 5a11730..f257a54 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,9 +1,7 @@ import os -import re import click -import subprocess from hydra.git_repo import check_repo -from hydra.utils import json_to_string +from hydra.utils import * from hydra.version import __version__ @click.group() @@ -31,15 +29,16 @@ def train(model_path, cpu, memory, github_token, cloud, options): return 0 check_repo(github_token) - git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() - # Remove https://www. prefix - git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') - commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip() + + git_url = get_repo_url() + commit_sha = get_commit_sha() if cloud == 'local': - subprocess.run( - ['sh', os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), - git_url, commit_sha, github_token, model_path, prefix_params]) + command = ['sh', + os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), + git_url, commit_sha, github_token, model_path, prefix_params] + + subprocess.run(command) return 0 raise Exception("Reached parts of Hydra that are not yet implemented.") diff --git a/hydra/git_repo.py b/hydra/git_repo.py index 650af9f..23fb5a0 100644 --- a/hydra/git_repo.py +++ b/hydra/git_repo.py @@ -2,6 +2,7 @@ import git import warnings + class GitRepo(): def __init__(self, repo): self.repo = repo @@ -23,6 +24,7 @@ def is_unsynced(self): count_unpushed_commits = len(list(self.repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) return count_unpushed_commits > 0 + def check_repo(github_token, repo=None): if github_token == None: raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") diff --git a/hydra/utils.py b/hydra/utils.py index 0b84b33..6b7f3a7 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -1,4 +1,6 @@ +import re import json +import subprocess def json_to_string(packet): dic = json.loads(packet) @@ -8,3 +10,14 @@ def json_to_string(packet): params += key + "=" + str(value) + " " return params.strip() + + +def get_repo_url(): + git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() + git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') + return git_url + + +def get_commit_sha(): + commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip() + return commit_sha diff --git a/tests/test_cil.py b/tests/test_cil.py index a468898..751df97 100644 --- a/tests/test_cil.py +++ b/tests/test_cil.py @@ -1,9 +1,56 @@ import pytest -from hydra.cli import * +from hydra.cli import * from click.testing import CliRunner +VALID_MODEL_PATH = "d3bug.py" +VALID_REPO_URL = "https://georgian.io/" +VALID_COMMIT_SHA = "m1rr0r1ng" +VALID_FILE_PATH = "ones/and/zer0es" +VALID_GITHUB_TOKEN = "Georgian" +VALID_PREFIX_PARAMS = "{'epoch': 88}" + def test_hello_world(): - runner = CliRunner() - result = runner.invoke(hello, ['Peter']) - assert result.exit_code == 0 - assert result.output == 'Hello Peter!\n' + runner = CliRunner() + result = runner.invoke(hello, ['Peter']) + assert result.exit_code == 0 + assert result.output == 'Hello Peter!\n' + +def test_train_local(mocker): + def stub(dummy): + pass + + mocker.patch( + "hydra.cli.check_repo", + stub + ) + mocker.patch( + "hydra.cli.get_repo_url", + return_value=VALID_REPO_URL + ) + mocker.patch( + "hydra.cli.get_commit_sha", + return_value=VALID_COMMIT_SHA + ) + mocker.patch( + "hydra.cli.os.path.join", + return_value=VALID_FILE_PATH + ) + mocker.patch( + "hydra.cli.json_to_string", + return_value=VALID_PREFIX_PARAMS + ) + + mocker.patch( + 'hydra.cli.subprocess.run', + ) + + runner = CliRunner() + result = runner.invoke(train, ['--model_path', VALID_MODEL_PATH, '--cloud', 'local', '--github_token', VALID_GITHUB_TOKEN]) + + + subprocess.run.assert_called_once_with( + ['sh', VALID_FILE_PATH, + VALID_REPO_URL, VALID_COMMIT_SHA, VALID_GITHUB_TOKEN, + VALID_MODEL_PATH, VALID_PREFIX_PARAMS]) + + assert result.exit_code == 0 From fb93254fb50eaeba0d6d45f532d92869266b3cc6 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 09:38:28 -0700 Subject: [PATCH 31/45] Update requirement --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements.txt b/requirements.txt index f7fc59b..170b7ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ click==7.1.2 +pytest==6.1.1 +pytest_mock==3.3.1 +GitPython==3.1.9 From e1057a960506c32993663f046ed0ab4efd88519b Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 09:38:45 -0700 Subject: [PATCH 32/45] Check coverage percentage --- .coveragerc | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..0a5b977 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = tests/*, setup.py From 937c6fa454033b5aa341e80c533a8e51ff233813 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 10:15:57 -0700 Subject: [PATCH 33/45] Add tests for GitRepo class --- tests/test_git_repo.py | 113 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/tests/test_git_repo.py b/tests/test_git_repo.py index 0ceb79e..804a2e2 100644 --- a/tests/test_git_repo.py +++ b/tests/test_git_repo.py @@ -1,9 +1,120 @@ import pytest import warnings import pytest_mock -from hydra.git_repo import check_repo +from hydra.git_repo import * VALID_GITHUB_TOKEN = "Georgian" +VALID_MULTIPLE_FILES = ["shopify.inc", "clickup.tm"] +VALID_MULTIPLE_COMMITS = ["m1rr0r1ng"] +VALID_BRANCH_NAME = "bay3s1an" + +def test_GitRepo_is_empty_true(mocker): + repo = mocker.Mock() + repo.bare = True + + git_repo = GitRepo(repo) + result = git_repo.is_empty() + + assert result == True + + +def test_GitRepo_is_empty_false(mocker): + repo = mocker.Mock() + repo.bare = False + + git_repo = GitRepo(repo) + result = git_repo.is_empty() + + assert result == False + + +def test_GitRepo_is_untracked_true(mocker): + repo = mocker.Mock() + repo.untracked_files = VALID_MULTIPLE_FILES + + git_repo = GitRepo(repo) + result = git_repo.is_untracked() + + assert result == True + + +def test_GitRepo_is_untracked_false(mocker): + repo = mocker.Mock() + repo.untracked_files = [] + + git_repo = GitRepo(repo) + result = git_repo.is_untracked() + + assert result == False + + +def test_GitRepo_is_modified_true(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_modified() + + repo.index.diff.assert_called_once_with(None) + assert result == True + + +def test_GitRepo_is_modified_false(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_modified() + + repo.index.diff.assert_called_once_with(None) + assert result == False + + +def test_GitRepo_is_uncommitted_true(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_uncommitted() + + repo.index.diff.assert_called_once_with("HEAD") + assert result == True + + +def test_GitRepo_is_uncommitted_false(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_uncommitted() + + repo.index.diff.assert_called_once_with("HEAD") + assert result == False + + +def test_GitRepo_is_unsynced_true(mocker): + repo = mocker.Mock() + repo.active_branch.name = VALID_BRANCH_NAME + repo.iter_commits.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_unsynced() + + repo.iter_commits.assert_called_once_with('origin/{}..{}'.format(VALID_BRANCH_NAME, VALID_BRANCH_NAME)) + assert result == True + + +def test_GitRepo_is_unsynced_false(mocker): + repo = mocker.Mock() + repo.active_branch.name = VALID_BRANCH_NAME + repo.iter_commits.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_unsynced() + + repo.iter_commits.assert_called_once_with('origin/{}..{}'.format(VALID_BRANCH_NAME, VALID_BRANCH_NAME)) + assert result == False + def test_check_repo_success(mocker): def pass_test(self): From cb871e28ba11925cff0935650b94ecc2c0236fde Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 10:20:27 -0700 Subject: [PATCH 34/45] Refactor GitRepo class into its single file --- hydra/cli.py | 2 - hydra/git_repo.py | 30 ------ hydra/utils.py | 31 ++++++ tests/test_git_repo.py | 208 ---------------------------------------- tests/test_utils.py | 210 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 241 insertions(+), 240 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index f257a54..4ac5c70 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,6 +1,5 @@ import os import click -from hydra.git_repo import check_repo from hydra.utils import * from hydra.version import __version__ @@ -29,7 +28,6 @@ def train(model_path, cpu, memory, github_token, cloud, options): return 0 check_repo(github_token) - git_url = get_repo_url() commit_sha = get_commit_sha() diff --git a/hydra/git_repo.py b/hydra/git_repo.py index 23fb5a0..9a43e2c 100644 --- a/hydra/git_repo.py +++ b/hydra/git_repo.py @@ -1,7 +1,3 @@ -import os -import git -import warnings - class GitRepo(): def __init__(self, repo): @@ -23,29 +19,3 @@ def is_unsynced(self): branch_name = self.repo.active_branch.name count_unpushed_commits = len(list(self.repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) return count_unpushed_commits > 0 - - -def check_repo(github_token, repo=None): - if github_token == None: - raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") - - if repo is None: - repo = git.Repo(os.getcwd()) - repo = GitRepo(repo) - - if repo.is_empty(): - raise Exception("Hydra is not being called in the root of a git repo.") - - if repo.is_untracked(): - warnings.warn("Some files are not tracked by git.", UserWarning) - - if repo.is_modified(): - raise Exception("Some modified files are not staged for commit.") - - if repo.is_uncommitted(): - raise Exception("Some staged files are not commited.") - - if repo.is_unsynced(): - raise Exception("Some commits are not pushed to the remote repo.") - - return 0 diff --git a/hydra/utils.py b/hydra/utils.py index 6b7f3a7..1d54994 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -1,6 +1,11 @@ import re +import os +import git import json +import warnings import subprocess +from hydra.git_repo import GitRepo + def json_to_string(packet): dic = json.loads(packet) @@ -21,3 +26,29 @@ def get_repo_url(): def get_commit_sha(): commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip() return commit_sha + + +def check_repo(github_token, repo=None): + if github_token == None: + raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") + + if repo is None: + repo = git.Repo(os.getcwd()) + repo = GitRepo(repo) + + if repo.is_empty(): + raise Exception("Hydra is not being called in the root of a git repo.") + + if repo.is_untracked(): + warnings.warn("Some files are not tracked by git.", UserWarning) + + if repo.is_modified(): + raise Exception("Some modified files are not staged for commit.") + + if repo.is_uncommitted(): + raise Exception("Some staged files are not commited.") + + if repo.is_unsynced(): + raise Exception("Some commits are not pushed to the remote repo.") + + return 0 diff --git a/tests/test_git_repo.py b/tests/test_git_repo.py index 804a2e2..5a6198c 100644 --- a/tests/test_git_repo.py +++ b/tests/test_git_repo.py @@ -3,7 +3,6 @@ import pytest_mock from hydra.git_repo import * -VALID_GITHUB_TOKEN = "Georgian" VALID_MULTIPLE_FILES = ["shopify.inc", "clickup.tm"] VALID_MULTIPLE_COMMITS = ["m1rr0r1ng"] VALID_BRANCH_NAME = "bay3s1an" @@ -114,210 +113,3 @@ def test_GitRepo_is_unsynced_false(mocker): repo.iter_commits.assert_called_once_with('origin/{}..{}'.format(VALID_BRANCH_NAME, VALID_BRANCH_NAME)) assert result == False - - -def test_check_repo_success(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - pass_test - ) - - result = check_repo(VALID_GITHUB_TOKEN) - assert result == 0 - - -def test_check_repo_empty_token(): - with pytest.raises(Exception) as err: - check_repo(None) - assert "GITHUB_TOKEN" in str(err.value) - - -def test_check_repo_untracked(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - with pytest.raises(Exception) as err: - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - fail_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - pass_test - ) - - check_repo(VALID_GITHUB_TOKEN) - - assert "Hydra is not being called in the root of a git repo." == str(err.value) - - -def test_check_repo_modified(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - with pytest.raises(Exception) as err: - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - fail_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - pass_test - ) - - check_repo(VALID_GITHUB_TOKEN) - - assert "Some modified files are not staged for commit." == str(err.value) - - -def test_check_repo_uncommitted(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - with pytest.raises(Exception) as err: - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - fail_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - pass_test - ) - - check_repo(VALID_GITHUB_TOKEN) - - assert "Some staged files are not commited." == str(err.value) - - -def test_check_repo_unsynced(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - with pytest.raises(Exception) as err: - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - fail_test - ) - - check_repo(VALID_GITHUB_TOKEN) - - assert "Some commits are not pushed to the remote repo."== str(err.value) - - -def test_check_repo_untracked(mocker): - def pass_test(self): - return False - def fail_test(self): - return True - - with pytest.warns(UserWarning) as record: - - mocker.patch( - "hydra.git_repo.GitRepo.is_empty", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_untracked", - fail_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_modified", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_uncommitted", - pass_test - ) - mocker.patch( - "hydra.git_repo.GitRepo.is_unsynced", - pass_test - ) - - check_repo(VALID_GITHUB_TOKEN) - - assert "Some files are not tracked by git." == record[0].message.args[0] diff --git a/tests/test_utils.py b/tests/test_utils.py index 2b99162..beda353 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,9 @@ import pytest +import pytest_mock from hydra.utils import * +VALID_GITHUB_TOKEN = "Georgian" + def test_json_to_string(): test_json = '{"depth":10, "epoch":100}' result = json_to_string(test_json) @@ -12,3 +15,210 @@ def test_empty_json_to_string(): result = json_to_string(test_json) assert result == "" + + +def test_check_repo_success(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + result = check_repo(VALID_GITHUB_TOKEN) + assert result == 0 + + +def test_check_repo_empty_token(): + with pytest.raises(Exception) as err: + check_repo(None) + assert "GITHUB_TOKEN" in str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Hydra is not being called in the root of a git repo." == str(err.value) + + +def test_check_repo_modified(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some modified files are not staged for commit." == str(err.value) + + +def test_check_repo_uncommitted(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some staged files are not commited." == str(err.value) + + +def test_check_repo_unsynced(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + fail_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some commits are not pushed to the remote repo."== str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.warns(UserWarning) as record: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some files are not tracked by git." == record[0].message.args[0] From d2308ca6709637c8d2c657180ba049909b9ba915 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 11:38:29 -0700 Subject: [PATCH 35/45] Refactor out running procedures into classes --- hydra/cli.py | 5 ++++- hydra/cloud/__init__.py | 0 hydra/cloud/abstract_platform.py | 12 ++++++++++++ hydra/cloud/fast_local_platform.py | 13 +++++++++++++ hydra/cloud/google_cloud.py | 0 hydra/cloud/local_platform.py | 13 +++++++++++++ 6 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 hydra/cloud/__init__.py create mode 100644 hydra/cloud/abstract_platform.py create mode 100644 hydra/cloud/fast_local_platform.py create mode 100644 hydra/cloud/google_cloud.py create mode 100644 hydra/cloud/local_platform.py diff --git a/hydra/cli.py b/hydra/cli.py index 4ac5c70..ccadb68 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,6 +1,7 @@ import os import click from hydra.utils import * +from hydra.cloud.fast_local_platform import FastLocalPlatform from hydra.version import __version__ @click.group() @@ -24,7 +25,9 @@ def train(model_path, cpu, memory, github_token, cloud, options): prefix_params = json_to_string(options) if cloud == 'fast_local': - subprocess.run([prefix_params, 'python3', model_path]) + platform = FastLocalPlatform(model_path, prefix_params) + platform.train() + return 0 check_repo(github_token) diff --git a/hydra/cloud/__init__.py b/hydra/cloud/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hydra/cloud/abstract_platform.py b/hydra/cloud/abstract_platform.py new file mode 100644 index 0000000..dc0be0d --- /dev/null +++ b/hydra/cloud/abstract_platform.py @@ -0,0 +1,12 @@ + + +class AbstractPlatform(): + def __init__(self, model_path, prefix_params=""): + self.model_path = model_path + self.prefix_params = prefix_params + + def train(self): + raise Exception("Not Implemented: Please implement this function in the subclass.") + + def serve(self): + raise Exception("Not Implemented: Please implement this function in the subclass.") diff --git a/hydra/cloud/fast_local_platform.py b/hydra/cloud/fast_local_platform.py new file mode 100644 index 0000000..e199624 --- /dev/null +++ b/hydra/cloud/fast_local_platform.py @@ -0,0 +1,13 @@ +import os +from hydra.cloud.abstract_platform import AbstractPlatform + +class FastLocalPlatform(AbstractPlatform): + def __init__(self, model_path, prefix_params): + super().__init__(model_path, prefix_params) + + def train(self): + os.system(" ".join([self.prefix_params, 'python3', self.model_path])) + return 0 + + def serve(self): + pass diff --git a/hydra/cloud/google_cloud.py b/hydra/cloud/google_cloud.py new file mode 100644 index 0000000..e69de29 diff --git a/hydra/cloud/local_platform.py b/hydra/cloud/local_platform.py new file mode 100644 index 0000000..3f033a6 --- /dev/null +++ b/hydra/cloud/local_platform.py @@ -0,0 +1,13 @@ +import subprocess +from hydra.cloud.abstract_platform import AbstractPlatform + +class LocalPlatform(AbstractPlatform): + def __init__(self, model_path, prefix_params="", ): + super().__init__(model_path, prefix_params) + + def train(self): + os.system(" ".join([self.prefix_params, 'python3', self.model_path])) + return 0 + + def serve(self): + pass From a85e221de1faf7df9f51d37bbcec3368c395aea0 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 11:58:04 -0700 Subject: [PATCH 36/45] Add local platform training procedure --- hydra/cli.py | 7 +++---- hydra/cloud/abstract_platform.py | 2 +- hydra/cloud/local_platform.py | 12 ++++++++++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/hydra/cli.py b/hydra/cli.py index ccadb68..701a430 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,6 +1,7 @@ import os import click from hydra.utils import * +from hydra.cloud.local_platform import LocalPlatform from hydra.cloud.fast_local_platform import FastLocalPlatform from hydra.version import __version__ @@ -35,11 +36,9 @@ def train(model_path, cpu, memory, github_token, cloud, options): commit_sha = get_commit_sha() if cloud == 'local': - command = ['sh', - os.path.join(os.path.dirname(__file__), '../docker/local_execution.sh'), - git_url, commit_sha, github_token, model_path, prefix_params] + platform = LocalPlatform(model_path, prefix_params, git_url, commit_sha, github_token) + platform.train() - subprocess.run(command) return 0 raise Exception("Reached parts of Hydra that are not yet implemented.") diff --git a/hydra/cloud/abstract_platform.py b/hydra/cloud/abstract_platform.py index dc0be0d..ace6fb4 100644 --- a/hydra/cloud/abstract_platform.py +++ b/hydra/cloud/abstract_platform.py @@ -1,7 +1,7 @@ class AbstractPlatform(): - def __init__(self, model_path, prefix_params=""): + def __init__(self, model_path, prefix_params): self.model_path = model_path self.prefix_params = prefix_params diff --git a/hydra/cloud/local_platform.py b/hydra/cloud/local_platform.py index 3f033a6..5308c0a 100644 --- a/hydra/cloud/local_platform.py +++ b/hydra/cloud/local_platform.py @@ -1,12 +1,20 @@ +import os import subprocess from hydra.cloud.abstract_platform import AbstractPlatform class LocalPlatform(AbstractPlatform): - def __init__(self, model_path, prefix_params="", ): + def __init__(self, model_path, prefix_params, git_url, commit_sha, github_token): + self.git_url = git_url + self.commit_sha = commit_sha + self.github_token = github_token super().__init__(model_path, prefix_params) def train(self): - os.system(" ".join([self.prefix_params, 'python3', self.model_path])) + execution_script_path = os.path.join(os.path.dirname(__file__), '../../docker/local_execution.sh') + command = ['sh', execution_script_path, self.git_url, self.commit_sha, + self.github_token, self.model_path, self.prefix_params] + + subprocess.run(command) return 0 def serve(self): From 7adb47ec3aac63e1ccba81f235c64c3375bb1e91 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 16:57:20 -0700 Subject: [PATCH 37/45] Add Google Cloud class basic outline --- hydra/cloud/google_cloud.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hydra/cloud/google_cloud.py b/hydra/cloud/google_cloud.py index e69de29..b659cd0 100644 --- a/hydra/cloud/google_cloud.py +++ b/hydra/cloud/google_cloud.py @@ -0,0 +1,14 @@ +from hydra.cloud.abstract_platform import AbstractPlatform + +class GoogleCloud(AbstractPlatform): + def __init__(self, model_path, prefix_params, git_url, commit_sha, github_token): + self.git_url = git_url + self.commit_sha = commit_sha + self.github_token = github_token + super().__init__(model_path, prefix_params) + + def train(self): + pass + + def serve(self): + pass From 071be9d6ca9e57052ebc771bbacff8fb4c9fb538 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 17:02:04 -0700 Subject: [PATCH 38/45] Order dict items for python 3.5 compat to avoid flaky tests --- hydra/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hydra/utils.py b/hydra/utils.py index 1d54994..f356520 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -4,11 +4,12 @@ import json import warnings import subprocess +import OrderedDict from hydra.git_repo import GitRepo def json_to_string(packet): - dic = json.loads(packet) + dic = json.loads(packet, object_pairs_hook=OrderedDict) params = "" for key, value in dic.items(): From 64820af22bd8c5329557af6cd5861dd16a1c1fb9 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 17:04:34 -0700 Subject: [PATCH 39/45] Minor name change --- hydra/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hydra/utils.py b/hydra/utils.py index f356520..5d7109c 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -9,10 +9,10 @@ def json_to_string(packet): - dic = json.loads(packet, object_pairs_hook=OrderedDict) + od = json.loads(packet, object_pairs_hook=OrderedDict) params = "" - for key, value in dic.items(): + for key, value in od.items(): params += key + "=" + str(value) + " " return params.strip() From b74e4cd92cd34475c2fa06652243cc76b536dad8 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Fri, 9 Oct 2020 17:09:35 -0700 Subject: [PATCH 40/45] Fix OrderDict import --- hydra/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/utils.py b/hydra/utils.py index 5d7109c..d9cabc9 100644 --- a/hydra/utils.py +++ b/hydra/utils.py @@ -4,7 +4,7 @@ import json import warnings import subprocess -import OrderedDict +from collections import OrderedDict from hydra.git_repo import GitRepo From 81f6fd27cf38a53a47a327ba067626e1a41439c2 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 13 Oct 2020 07:48:34 -0700 Subject: [PATCH 41/45] Add test coverage to github workflow --- .github/workflows/python-package.yml | 1 + hydra/cli.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f1abc2f..a622021 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,3 +37,4 @@ jobs: - name: Test with pytest run: | pytest + pytest --cov=. tests/ diff --git a/hydra/cli.py b/hydra/cli.py index 701a430..7a9f227 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -21,7 +21,7 @@ def hello(name): @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') @click.option('--cloud', default='local', required=True, type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var -@click.option('-o', '--options', default='{}', type=str) +@click.option('-o', '--options', default='{}', type=str) #env #TODO Add help for options def train(model_path, cpu, memory, github_token, cloud, options): prefix_params = json_to_string(options) From 05d539709dcbb166f3b47f7fd99612dab42f5fc5 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 13 Oct 2020 08:00:16 -0700 Subject: [PATCH 42/45] Add help for option flag --- hydra/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydra/cli.py b/hydra/cli.py index 7a9f227..1afbc91 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -21,7 +21,7 @@ def hello(name): @click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') @click.option('--cloud', default='local', required=True, type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) @click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var -@click.option('-o', '--options', default='{}', type=str) #env #TODO Add help for options +@click.option('-o', '--options', default='{}', type=str, help='Environmental variables for the script') def train(model_path, cpu, memory, github_token, cloud, options): prefix_params = json_to_string(options) From 6270069a7780b58c21d8e96a9d78ddfbdaa43d43 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 13 Oct 2020 08:01:02 -0700 Subject: [PATCH 43/45] Revert breaking change to workflow --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a622021..f1abc2f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,4 +37,3 @@ jobs: - name: Test with pytest run: | pytest - pytest --cov=. tests/ From 6cb8fa2e6c6bf9793aee249777120eb4662f41f1 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 13 Oct 2020 08:06:35 -0700 Subject: [PATCH 44/45] Workflow add test coverage --- .github/workflows/python-package.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f1abc2f..10cdb19 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,3 +37,6 @@ jobs: - name: Test with pytest run: | pytest + - name: Display test coverage + run: | + python -m pytest --cov=. tests/ From 6489642943e4f27c7a859b148304e34294a098a0 Mon Sep 17 00:00:00 2001 From: Tony Shen Date: Tue, 13 Oct 2020 08:12:50 -0700 Subject: [PATCH 45/45] Add dependency in github workflow --- .github/workflows/python-package.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 10cdb19..bda022f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,6 +27,7 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8 pytest + pip install pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | @@ -39,4 +40,4 @@ jobs: pytest - name: Display test coverage run: | - python -m pytest --cov=. tests/ + pytest --cov=. tests/