diff --git a/Makefile b/Makefile index 050cc8dfa..4307ee1cc 100644 --- a/Makefile +++ b/Makefile @@ -26,13 +26,13 @@ requirements: ## Format the code using isort and black format: - isort --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}" - black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}" - + isort --profile black ccds hooks tests docs/scripts + black ccds hooks tests docs/scripts + lint: - flake8 ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}" - isort --check --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}" - black --check ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}" + flake8 ccds hooks tests docs/scripts + isort --check --profile black ccds hooks tests docs/scripts + black --check ccds hooks tests docs/scripts ### DOCS diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 1385a89f7..e8975e747 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -15,7 +15,6 @@ "isort", "pip", "python-dotenv", - "setuptools", ] # {% if cookiecutter.dataset_storage.s3 %} @@ -26,6 +25,7 @@ packages += [ "typer", "loguru", + "tqdm", ] # {% endif %} diff --git a/tests/test_creation.py b/tests/test_creation.py index 06c1658a3..322c00ed3 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -29,6 +29,7 @@ def test_baking_configs(config, fast): with bake_project(config) as project_directory: verify_folders(project_directory, config) verify_files(project_directory, config) + lint(project_directory) if fast < 2: verify_makefile_commands(project_directory, config) @@ -54,10 +55,7 @@ def verify_folders(root, config): if config["include_code_scaffold"] == "Yes": expected_dirs += [ - f"{config['module_name']}/data", - f"{config['module_name']}/features", - f"{config['module_name']}/models", - f"{config['module_name']}/visualization", + f"{config['module_name']}/modeling", ] if config["docs"] == "mkdocs": @@ -104,15 +102,13 @@ def verify_files(root, config): if config["include_code_scaffold"] == "Yes": expected_files += [ - f"{config['module_name']}/data/__init__.py", - f"{config['module_name']}/data/make_dataset.py", - f"{config['module_name']}/features/__init__.py", - f"{config['module_name']}/features/build_features.py", - f"{config['module_name']}/models/__init__.py", - f"{config['module_name']}/models/train_model.py", - f"{config['module_name']}/models/predict_model.py", - f"{config['module_name']}/visualization/__init__.py", - f"{config['module_name']}/visualization/visualize.py", + f"{config['module_name']}/config.py", + f"{config['module_name']}/dataset.py", + f"{config['module_name']}/features.py", + f"{config['module_name']}/modeling/__init__.py", + f"{config['module_name']}/modeling/train.py", + f"{config['module_name']}/modeling/predict.py", + f"{config['module_name']}/plots.py", ] if config["docs"] == "mkdocs": @@ -183,3 +179,30 @@ def verify_makefile_commands(root, config): assert "clean Delete all compiled Python files" in stdout_output assert result_returncode == 0 + + +def lint(root): + """Run the linters on the project.""" + result = run( + ["make", "lint"], + cwd=root, + stderr=PIPE, + stdout=PIPE, + ) + result_returncode = result.returncode + + encoding = sys.stdout.encoding + + if encoding is None: + encoding = "utf-8" + + # normally hidden by pytest except in failure we want this displayed + print("PATH=", os.getenv("PATH")) + print("\n======================= STDOUT ======================") + stdout_output = result.stdout.decode(encoding) + print(stdout_output) + + print("\n======================= STDERR ======================") + print(result.stderr.decode(encoding)) + + assert result_returncode == 0 diff --git a/{{ cookiecutter.repo_name }}/Makefile b/{{ cookiecutter.repo_name }}/Makefile index 543c7035f..764ba81cc 100644 --- a/{{ cookiecutter.repo_name }}/Makefile +++ b/{{ cookiecutter.repo_name }}/Makefile @@ -15,7 +15,7 @@ PYTHON_INTERPRETER = python .PHONY: requirements requirements: {% if "requirements.txt" == cookiecutter.dependency_file -%} - $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel + $(PYTHON_INTERPRETER) -m pip install -U pip $(PYTHON_INTERPRETER) -m pip install -r requirements.txt {% elif "environment.yml" == cookiecutter.dependency_file -%} conda env update --name $(PROJECT_NAME) --file environment.yml --prune @@ -35,9 +35,9 @@ clean: .PHONY: lint lint: flake8 {{ cookiecutter.module_name }} + isort --check --diff --profile black {{ cookiecutter.module_name }} black --check --config pyproject.toml {{ cookiecutter.module_name }} - ## Format source code with black .PHONY: format format: @@ -119,4 +119,4 @@ endef export PRINT_HELP_PYSCRIPT help: - @python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST) \ No newline at end of file + @python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST) diff --git a/{{ cookiecutter.repo_name }}/pyproject.toml b/{{ cookiecutter.repo_name }}/pyproject.toml index b80ef391b..5758a5781 100644 --- a/{{ cookiecutter.repo_name }}/pyproject.toml +++ b/{{ cookiecutter.repo_name }}/pyproject.toml @@ -26,3 +26,7 @@ exclude = ''' | \.venv )/ ''' + +[tool.ruff.lint.isort] +known_first_party = ["{{ cookiecutter.module_name }}"] +force_sort_within_sections = true diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py index e69de29bb..9680e9410 100644 --- a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py @@ -0,0 +1 @@ +import config # noqa: F401 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py new file mode 100644 index 000000000..8d77c2297 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py @@ -0,0 +1,32 @@ +from pathlib import Path + +from dotenv import load_dotenv +from loguru import logger + +# Load environment variables from .env file if it exists +load_dotenv() + +# Paths +PROJ_ROOT = Path(__file__).resolve().parents[1] +logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}") + +DATA_DIR = PROJ_ROOT / "data" +RAW_DATA_DIR = DATA_DIR / "raw" +INTERIM_DATA_DIR = DATA_DIR / "interim" +PROCESSED_DATA_DIR = DATA_DIR / "processed" +EXTERNAL_DATA_DIR = DATA_DIR / "external" + +MODELS_DIR = PROJ_ROOT / "models" + +REPORTS_DIR = PROJ_ROOT / "reports" +FIGURES_DIR = REPORTS_DIR / "figures" + +# If tqdm is installed, configure loguru with tqdm.write +# https://github.com/Delgan/loguru/issues/135 +try: + from tqdm import tqdm + + logger.remove(0) + logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True) +except ModuleNotFoundError: + pass diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py deleted file mode 100644 index ec4b3912e..000000000 --- a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -from pathlib import Path - -import click -from dotenv import find_dotenv, load_dotenv - - -@click.command() -@click.argument("input_filepath", type=click.Path(exists=True)) -@click.argument("output_filepath", type=click.Path()) -def main(input_filepath, output_filepath): - """Runs data processing scripts to turn raw data from (../raw) into - cleaned data ready to be analyzed (saved in ../processed). - """ - logger = logging.getLogger(__name__) - logger.info("making final data set from raw data") - - -if __name__ == "__main__": - log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - logging.basicConfig(level=logging.INFO, format=log_fmt) - - # not used in this stub but often useful for finding various files - project_dir = Path(__file__).resolve().parents[2] - - # find .env automagically by walking up directories until it's found, then - # load up the .env entries as environment variables - load_dotenv(find_dotenv()) - - main() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py new file mode 100644 index 000000000..04f3ed6e3 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import typer +from loguru import logger +from tqdm import tqdm + +from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR, RAW_DATA_DIR + +app = typer.Typer() + + +@app.command() +def main( + # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ---- + input_path: Path = RAW_DATA_DIR / "dataset.csv", + output_path: Path = PROCESSED_DATA_DIR / "dataset.csv", + # ---------------------------------------------- +): + # ---- REPLACE THIS WITH YOUR OWN CODE ---- + logger.info("Processing dataset...") + for i in tqdm(range(10), total=10): + if i == 5: + logger.info("Something happened for iteration 5.") + logger.success("Processing dataset complete.") + # ----------------------------------------- + + +if __name__ == "__main__": + app() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py new file mode 100644 index 000000000..20da88b85 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import typer +from loguru import logger +from tqdm import tqdm + +from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR + +app = typer.Typer() + + +@app.command() +def main( + # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ---- + input_path: Path = PROCESSED_DATA_DIR / "dataset.csv", + output_path: Path = PROCESSED_DATA_DIR / "features.csv", + # ----------------------------------------- +): + # ---- REPLACE THIS WITH YOUR OWN CODE ---- + logger.info("Generating features from dataset...") + for i in tqdm(range(10), total=10): + if i == 5: + logger.info("Something happened for iteration 5.") + logger.success("Features generation complete.") + # ----------------------------------------- + + +if __name__ == "__main__": + app() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/__init__.py similarity index 100% rename from {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/__init__.py rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/__init__.py diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py new file mode 100644 index 000000000..e0efdd430 --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import typer +from loguru import logger +from tqdm import tqdm + +from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR + +app = typer.Typer() + + +@app.command() +def main( + # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ---- + features_path: Path = PROCESSED_DATA_DIR / "test_features.csv", + model_path: Path = MODELS_DIR / "model.pkl", + predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv", + # ----------------------------------------- +): + # ---- REPLACE THIS WITH YOUR OWN CODE ---- + logger.info("Performing inference for model...") + for i in tqdm(range(10), total=10): + if i == 5: + logger.info("Something happened for iteration 5.") + logger.success("Inference complete.") + # ----------------------------------------- + + +if __name__ == "__main__": + app() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py new file mode 100644 index 000000000..4c6ee8f4b --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import typer +from loguru import logger +from tqdm import tqdm + +from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR + +app = typer.Typer() + + +@app.command() +def main( + # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ---- + features_path: Path = PROCESSED_DATA_DIR / "features.csv", + labels_path: Path = PROCESSED_DATA_DIR / "labels.csv", + model_path: Path = MODELS_DIR / "model.pkl", + # ----------------------------------------- +): + # ---- REPLACE THIS WITH YOUR OWN CODE ---- + logger.info("Training some model...") + for i in tqdm(range(10), total=10): + if i == 5: + logger.info("Something happened for iteration 5.") + logger.success("Modeling training complete.") + # ----------------------------------------- + + +if __name__ == "__main__": + app() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py new file mode 100644 index 000000000..dad5c0cea --- /dev/null +++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import typer +from loguru import logger +from tqdm import tqdm + +from {{ cookiecutter.module_name }}.config import FIGURES_DIR, PROCESSED_DATA_DIR + +app = typer.Typer() + + +@app.command() +def main( + # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ---- + input_path: Path = PROCESSED_DATA_DIR / "dataset.csv", + output_path: Path = FIGURES_DIR / "plot.png", + # ----------------------------------------- +): + # ---- REPLACE THIS WITH YOUR OWN CODE ---- + logger.info("Generating plot from data...") + for i in tqdm(range(10), total=10): + if i == 5: + logger.info("Something happened for iteration 5.") + logger.success("Plot generation complete.") + # ----------------------------------------- + + +if __name__ == "__main__": + app() diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py deleted file mode 100644 index e69de29bb..000000000