diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..0a5b977 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = tests/*, setup.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f1abc2f..bda022f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,6 +27,7 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8 pytest + pip install pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | @@ -37,3 +38,6 @@ jobs: - name: Test with pytest run: | pytest + - name: Display test coverage + run: | + pytest --cov=. tests/ diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..f45cbd4 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,6 @@ +FROM continuumio/miniconda3 + +ADD executor.sh /home +WORKDIR /home + +ENTRYPOINT ["sh", "executor.sh"] diff --git a/docker/executor.sh b/docker/executor.sh new file mode 100644 index 0000000..ed93108 --- /dev/null +++ b/docker/executor.sh @@ -0,0 +1,9 @@ +mkdir project +cd project + +git clone https://$OAUTH_TOKEN:x-oauth-basic@$GIT_URL . +git checkout $COMMIT_SHA + +conda env create -f environment.yml + +conda run -n hydra $PREFIX_PARAMS python3 $MODEL_PATH diff --git a/docker/local_execution.sh b/docker/local_execution.sh new file mode 100644 index 0000000..797e47b --- /dev/null +++ b/docker/local_execution.sh @@ -0,0 +1,21 @@ + +DIR="$( dirname "${BASH_SOURCE[0]}" )" + +# Add random Hash +LOG_NAME=$(date +'%Y_%m_%d_%H_%M_%S') + +cd $DIR +docker build -t hydra_image . + +docker run \ + -e GIT_URL=$1 \ + -e COMMIT_SHA=$2 \ + -e OAUTH_TOKEN=$3 \ + -e MODEL_PATH=$4 \ + -e PREFIX_PARAMS=$5 \ + hydra_image:latest 2>&1 | tee ${LOG_NAME}.log + +# Move Log file to where the program is being called +cd - +mkdir -p tmp/hydra +mv ${DIR}/${LOG_NAME}.log tmp/hydra/ diff --git a/hydra/cli.py b/hydra/cli.py index fdd3ce6..1afbc91 100644 --- a/hydra/cli.py +++ b/hydra/cli.py @@ -1,18 +1,44 @@ +import os import click +from hydra.utils import * +from hydra.cloud.local_platform import LocalPlatform +from hydra.cloud.fast_local_platform import FastLocalPlatform from hydra.version import __version__ - @click.group() @click.version_option(__version__) def cli(): pass +@click.command() +@click.argument('name') +def hello(name): + click.echo('Hello %s!' % name) @cli.command() -@click.option('--project_name') -@click.option('--model_name') -@click.option('--cpu') -@click.option('--memory') -@click.option('--options') -def train(project_name, model_name, cpu, memory, options): - click.echo("This is the training command") +@click.option('-m', '--model_path', required=True, type=str) +@click.option('-c', '--cpu', default=16, type=click.IntRange(0, 128), help='Number of CPU cores required') +@click.option('-r', '--memory', default=8, type=click.IntRange(0, 128), help='GB of RAM required') +@click.option('--cloud', default='local', required=True, type=click.Choice(['fast_local','local', 'aws', 'gcp', 'azure'], case_sensitive=False)) +@click.option('--github_token', envvar='GITHUB_TOKEN') # Takes either an option or environment var +@click.option('-o', '--options', default='{}', type=str, help='Environmental variables for the script') +def train(model_path, cpu, memory, github_token, cloud, options): + prefix_params = json_to_string(options) + + if cloud == 'fast_local': + platform = FastLocalPlatform(model_path, prefix_params) + platform.train() + + return 0 + + check_repo(github_token) + git_url = get_repo_url() + commit_sha = get_commit_sha() + + if cloud == 'local': + platform = LocalPlatform(model_path, prefix_params, git_url, commit_sha, github_token) + platform.train() + + return 0 + + raise Exception("Reached parts of Hydra that are not yet implemented.") diff --git a/hydra/cloud/__init__.py b/hydra/cloud/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hydra/cloud/abstract_platform.py b/hydra/cloud/abstract_platform.py new file mode 100644 index 0000000..ace6fb4 --- /dev/null +++ b/hydra/cloud/abstract_platform.py @@ -0,0 +1,12 @@ + + +class AbstractPlatform(): + def __init__(self, model_path, prefix_params): + self.model_path = model_path + self.prefix_params = prefix_params + + def train(self): + raise Exception("Not Implemented: Please implement this function in the subclass.") + + def serve(self): + raise Exception("Not Implemented: Please implement this function in the subclass.") diff --git a/hydra/cloud/fast_local_platform.py b/hydra/cloud/fast_local_platform.py new file mode 100644 index 0000000..e199624 --- /dev/null +++ b/hydra/cloud/fast_local_platform.py @@ -0,0 +1,13 @@ +import os +from hydra.cloud.abstract_platform import AbstractPlatform + +class FastLocalPlatform(AbstractPlatform): + def __init__(self, model_path, prefix_params): + super().__init__(model_path, prefix_params) + + def train(self): + os.system(" ".join([self.prefix_params, 'python3', self.model_path])) + return 0 + + def serve(self): + pass diff --git a/hydra/cloud/google_cloud.py b/hydra/cloud/google_cloud.py new file mode 100644 index 0000000..b659cd0 --- /dev/null +++ b/hydra/cloud/google_cloud.py @@ -0,0 +1,14 @@ +from hydra.cloud.abstract_platform import AbstractPlatform + +class GoogleCloud(AbstractPlatform): + def __init__(self, model_path, prefix_params, git_url, commit_sha, github_token): + self.git_url = git_url + self.commit_sha = commit_sha + self.github_token = github_token + super().__init__(model_path, prefix_params) + + def train(self): + pass + + def serve(self): + pass diff --git a/hydra/cloud/local_platform.py b/hydra/cloud/local_platform.py new file mode 100644 index 0000000..5308c0a --- /dev/null +++ b/hydra/cloud/local_platform.py @@ -0,0 +1,21 @@ +import os +import subprocess +from hydra.cloud.abstract_platform import AbstractPlatform + +class LocalPlatform(AbstractPlatform): + def __init__(self, model_path, prefix_params, git_url, commit_sha, github_token): + self.git_url = git_url + self.commit_sha = commit_sha + self.github_token = github_token + super().__init__(model_path, prefix_params) + + def train(self): + execution_script_path = os.path.join(os.path.dirname(__file__), '../../docker/local_execution.sh') + command = ['sh', execution_script_path, self.git_url, self.commit_sha, + self.github_token, self.model_path, self.prefix_params] + + subprocess.run(command) + return 0 + + def serve(self): + pass diff --git a/hydra/git_repo.py b/hydra/git_repo.py new file mode 100644 index 0000000..9a43e2c --- /dev/null +++ b/hydra/git_repo.py @@ -0,0 +1,21 @@ + +class GitRepo(): + def __init__(self, repo): + self.repo = repo + + def is_empty(self): + return self.repo.bare + + def is_untracked(self): + return len(self.repo.untracked_files) > 0 + + def is_modified(self): + return len(self.repo.index.diff(None)) > 0 + + def is_uncommitted(self): + return len(self.repo.index.diff("HEAD")) > 0 + + def is_unsynced(self): + branch_name = self.repo.active_branch.name + count_unpushed_commits = len(list(self.repo.iter_commits('origin/{}..{}'.format(branch_name, branch_name)))) + return count_unpushed_commits > 0 diff --git a/hydra/utils.py b/hydra/utils.py new file mode 100644 index 0000000..d9cabc9 --- /dev/null +++ b/hydra/utils.py @@ -0,0 +1,55 @@ +import re +import os +import git +import json +import warnings +import subprocess +from collections import OrderedDict +from hydra.git_repo import GitRepo + + +def json_to_string(packet): + od = json.loads(packet, object_pairs_hook=OrderedDict) + + params = "" + for key, value in od.items(): + params += key + "=" + str(value) + " " + + return params.strip() + + +def get_repo_url(): + git_url = subprocess.check_output("git config --get remote.origin.url", shell=True).decode("utf-8").strip() + git_url = re.compile(r"https?://(www\.)?").sub("", git_url).strip().strip('/') + return git_url + + +def get_commit_sha(): + commit_sha = subprocess.check_output("git log --pretty=tformat:'%h' -n1 .", shell=True).decode("utf-8").strip() + return commit_sha + + +def check_repo(github_token, repo=None): + if github_token == None: + raise Exception("GITHUB_TOKEN not found in environment variable or as argument.") + + if repo is None: + repo = git.Repo(os.getcwd()) + repo = GitRepo(repo) + + if repo.is_empty(): + raise Exception("Hydra is not being called in the root of a git repo.") + + if repo.is_untracked(): + warnings.warn("Some files are not tracked by git.", UserWarning) + + if repo.is_modified(): + raise Exception("Some modified files are not staged for commit.") + + if repo.is_uncommitted(): + raise Exception("Some staged files are not commited.") + + if repo.is_unsynced(): + raise Exception("Some commits are not pushed to the remote repo.") + + return 0 diff --git a/hydra/version.py b/hydra/version.py index 541f859..b794fd4 100644 --- a/hydra/version.py +++ b/hydra/version.py @@ -1 +1 @@ -__version__ = '0.1.0' \ No newline at end of file +__version__ = '0.1.0' diff --git a/requirements.txt b/requirements.txt index 609dec7..170b7ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ -click==7.1.2 \ No newline at end of file +click==7.1.2 +pytest==6.1.1 +pytest_mock==3.3.1 +GitPython==3.1.9 diff --git a/tests/test_cil.py b/tests/test_cil.py new file mode 100644 index 0000000..751df97 --- /dev/null +++ b/tests/test_cil.py @@ -0,0 +1,56 @@ +import pytest +from hydra.cli import * +from click.testing import CliRunner + +VALID_MODEL_PATH = "d3bug.py" +VALID_REPO_URL = "https://georgian.io/" +VALID_COMMIT_SHA = "m1rr0r1ng" +VALID_FILE_PATH = "ones/and/zer0es" +VALID_GITHUB_TOKEN = "Georgian" +VALID_PREFIX_PARAMS = "{'epoch': 88}" + +def test_hello_world(): + runner = CliRunner() + result = runner.invoke(hello, ['Peter']) + assert result.exit_code == 0 + assert result.output == 'Hello Peter!\n' + +def test_train_local(mocker): + def stub(dummy): + pass + + mocker.patch( + "hydra.cli.check_repo", + stub + ) + mocker.patch( + "hydra.cli.get_repo_url", + return_value=VALID_REPO_URL + ) + mocker.patch( + "hydra.cli.get_commit_sha", + return_value=VALID_COMMIT_SHA + ) + mocker.patch( + "hydra.cli.os.path.join", + return_value=VALID_FILE_PATH + ) + mocker.patch( + "hydra.cli.json_to_string", + return_value=VALID_PREFIX_PARAMS + ) + + mocker.patch( + 'hydra.cli.subprocess.run', + ) + + runner = CliRunner() + result = runner.invoke(train, ['--model_path', VALID_MODEL_PATH, '--cloud', 'local', '--github_token', VALID_GITHUB_TOKEN]) + + + subprocess.run.assert_called_once_with( + ['sh', VALID_FILE_PATH, + VALID_REPO_URL, VALID_COMMIT_SHA, VALID_GITHUB_TOKEN, + VALID_MODEL_PATH, VALID_PREFIX_PARAMS]) + + assert result.exit_code == 0 diff --git a/tests/test_dummy.py b/tests/test_dummy.py deleted file mode 100644 index e09c119..0000000 --- a/tests/test_dummy.py +++ /dev/null @@ -1,4 +0,0 @@ -# dummy tests - -def test_dummy(): - assert 1 == 1 diff --git a/tests/test_git_repo.py b/tests/test_git_repo.py new file mode 100644 index 0000000..5a6198c --- /dev/null +++ b/tests/test_git_repo.py @@ -0,0 +1,115 @@ +import pytest +import warnings +import pytest_mock +from hydra.git_repo import * + +VALID_MULTIPLE_FILES = ["shopify.inc", "clickup.tm"] +VALID_MULTIPLE_COMMITS = ["m1rr0r1ng"] +VALID_BRANCH_NAME = "bay3s1an" + +def test_GitRepo_is_empty_true(mocker): + repo = mocker.Mock() + repo.bare = True + + git_repo = GitRepo(repo) + result = git_repo.is_empty() + + assert result == True + + +def test_GitRepo_is_empty_false(mocker): + repo = mocker.Mock() + repo.bare = False + + git_repo = GitRepo(repo) + result = git_repo.is_empty() + + assert result == False + + +def test_GitRepo_is_untracked_true(mocker): + repo = mocker.Mock() + repo.untracked_files = VALID_MULTIPLE_FILES + + git_repo = GitRepo(repo) + result = git_repo.is_untracked() + + assert result == True + + +def test_GitRepo_is_untracked_false(mocker): + repo = mocker.Mock() + repo.untracked_files = [] + + git_repo = GitRepo(repo) + result = git_repo.is_untracked() + + assert result == False + + +def test_GitRepo_is_modified_true(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_modified() + + repo.index.diff.assert_called_once_with(None) + assert result == True + + +def test_GitRepo_is_modified_false(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_modified() + + repo.index.diff.assert_called_once_with(None) + assert result == False + + +def test_GitRepo_is_uncommitted_true(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_uncommitted() + + repo.index.diff.assert_called_once_with("HEAD") + assert result == True + + +def test_GitRepo_is_uncommitted_false(mocker): + repo = mocker.Mock() + repo.index.diff.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_uncommitted() + + repo.index.diff.assert_called_once_with("HEAD") + assert result == False + + +def test_GitRepo_is_unsynced_true(mocker): + repo = mocker.Mock() + repo.active_branch.name = VALID_BRANCH_NAME + repo.iter_commits.return_value = VALID_MULTIPLE_COMMITS + + git_repo = GitRepo(repo) + result = git_repo.is_unsynced() + + repo.iter_commits.assert_called_once_with('origin/{}..{}'.format(VALID_BRANCH_NAME, VALID_BRANCH_NAME)) + assert result == True + + +def test_GitRepo_is_unsynced_false(mocker): + repo = mocker.Mock() + repo.active_branch.name = VALID_BRANCH_NAME + repo.iter_commits.return_value = [] + + git_repo = GitRepo(repo) + result = git_repo.is_unsynced() + + repo.iter_commits.assert_called_once_with('origin/{}..{}'.format(VALID_BRANCH_NAME, VALID_BRANCH_NAME)) + assert result == False diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..beda353 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,224 @@ +import pytest +import pytest_mock +from hydra.utils import * + +VALID_GITHUB_TOKEN = "Georgian" + +def test_json_to_string(): + test_json = '{"depth":10, "epoch":100}' + result = json_to_string(test_json) + + assert result == "depth=10 epoch=100" + +def test_empty_json_to_string(): + test_json = '{}' + result = json_to_string(test_json) + + assert result == "" + + +def test_check_repo_success(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + result = check_repo(VALID_GITHUB_TOKEN) + assert result == 0 + + +def test_check_repo_empty_token(): + with pytest.raises(Exception) as err: + check_repo(None) + assert "GITHUB_TOKEN" in str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Hydra is not being called in the root of a git repo." == str(err.value) + + +def test_check_repo_modified(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some modified files are not staged for commit." == str(err.value) + + +def test_check_repo_uncommitted(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some staged files are not commited." == str(err.value) + + +def test_check_repo_unsynced(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.raises(Exception) as err: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + fail_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some commits are not pushed to the remote repo."== str(err.value) + + +def test_check_repo_untracked(mocker): + def pass_test(self): + return False + def fail_test(self): + return True + + with pytest.warns(UserWarning) as record: + + mocker.patch( + "hydra.git_repo.GitRepo.is_empty", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_untracked", + fail_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_modified", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_uncommitted", + pass_test + ) + mocker.patch( + "hydra.git_repo.GitRepo.is_unsynced", + pass_test + ) + + check_repo(VALID_GITHUB_TOKEN) + + assert "Some files are not tracked by git." == record[0].message.args[0]