Merge pull request #40 from ocean-perception/main

Fix #39: Revert to plain old versioning
cappelletto · Jun 18, 2023 · 54ed479 · 54ed479
2 parents 2e5b0c5 + 8162dba
commit 54ed479
Show file tree

Hide file tree

Showing 14 changed files with 170 additions and 153 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,17 @@
+repos:
+- repo: https://github.com/pycqa/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    args: ["--profile", "black"]
+- repo: https://github.com/python/black
+  rev: 22.3.0
+  hooks:
+  - id: black
+    pass_filenames: true
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.0.270
+  hooks:
+    - id: ruff
+      args: [--fix, --exit-non-zero-on-fix]
diff --git a/.pre-commit-config.yaml.disabled b/.pre-commit-config.yaml.disabled
diff --git a/.ruff.toml b/.ruff.toml
@@ -0,0 +1,38 @@
+# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
+select = ["E", "F"]
+ignore = []
+
+# Allow autofix for all enabled rules (when `--fix`) is provided.
+fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
+unfixable = []
+
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+]
+
+# Allow lines to be as long as 150 characters.
+line-length = 150
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
diff --git a/setup.py b/setup.py
@@ -7,31 +7,28 @@
 # package source folder: src/
 # dependencies: numpy, scipy, torch, pandas, scikit-learn, blitz
 
-from distutils.util import convert_path
-
-import git
 from setuptools import find_packages, setup
 
-main_ns = {}
-ver_path = convert_path("src/bnn_inference/version.py")
-with open(ver_path) as ver_file:
-    exec(ver_file.read(), main_ns)
-
 
 def run_setup():
     # get the long description from the README file
     # TODO: Merge with short/specific decription provided during setup() call
     # print warning message about future deprecation of setup.py approach
-    print ("WARNING: setup.py will be deprecated in the future. Please refer to the most recent documentation for changes in installation instructions")
+    print(
+        "WARNING: setup.py will be deprecated in the future. Please refer to the most "
+        "recent documentation for changes in installation instructions"
+    )
     with open("README.md") as f:
         long_description = f.read()
     if long_description is None:
-        long_description = "Small ML pipeline to infer image classes from latent representations of low resolution priors usin Bayesian NN"
+        long_description = "Small ML pipeline to infer image classes from latent "
+        "representations of low resolution priors usin Bayesian NN"
 
     setup(
         name="bnn_inference",
-        version=main_ns["__version__"],
-        description="Bayesian NN training/inference engine to learn mappings between latent representations of low resolution maps and high resolution maps",
+        version="0.0.9",
+        description="Bayesian NN training/inference engine to learn mappings between "
+        "latent representations of low resolution maps and high resolution maps",
         author="Jose Cappelletto",
         author_email="j.cappelletto@soton.ac.uk",
         url="https://github.com/cappelletto/bayesian_inference",
@@ -55,7 +52,7 @@ def run_setup():
             "Pillow>=9.1.1",
             "scipy>=1.5.0",
             "typer>=0.7.0",
-            "gitpython>=3.1.14",
+            "pyyaml",
         ],
     )
 

diff --git a/src/bnn_inference/__init__.py b/src/bnn_inference/__init__.py
@@ -3,4 +3,3 @@
 from bnn_inference.tools.console import Console  # noqa: F401
 from bnn_inference.tools.dataloader import CustomDataloader  # noqa: F401
 from bnn_inference.tools.predictor import PredictiveEngine  # noqa: F401
-from bnn_inference.version import __version__  # noqa: F401
diff --git a/src/bnn_inference/cli.py b/src/bnn_inference/cli.py
@@ -38,33 +38,42 @@ def config_cb(ctx: typer.Context, param: typer.CallbackParam, value: str):
 def train(
     config: str = typer.Option(
         "",
-        help="[future] Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
+        help="Path to a YAML configuration file. You can use the file exclusively or "
+        "overwrite any arguments via CLI.",
         callback=config_cb,
         is_eager=True,
     ),
     latent_csv: str = typer.Option(
         ...,
-        help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
+        help="Path to CSV containing the latent representation vector for each input "
+        "entry (image). The 'UUID' is used to match against the target file entries",
     ),
     latent_key: str = typer.Option(
         "latent_",
-        help="Name of the key used for the columns containing the latent vector. For example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
+        help="Name of the key used for the columns containing the latent vector. For "
+        "example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
     ),
     target_csv: str = typer.Option(
         ...,
-        help="Path to CSV containing the target entries to be used for training/validation. The 'UUID' is used to match against the input file entries",
+        help="Path to CSV containing the target entries to be used for "
+        "training/validation. The 'UUID' is used to match against the input file entries",
     ),
     target_key: str = typer.Option(
         ...,
-        help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
+        help="Keyword that defines the field to be learnt/predicted. It must match the "
+        "column name in the target file",
     ),
     uuid_key: str = typer.Option(
         "relative_path",
-        help="Unique identifier string used as key for input/target example matching. The UUID string must match for both the input (latent) file and the target file column identifier",
+        help="Unique identifier string used as key for input/target example matching. "
+        "The UUID string must match for both the input (latent) file and the target "
+        "file column identifier",
     ),
     output_csv: str = typer.Option(
         "",
-        help="Generated file containing the expected and predicted value for each input entry. It preserves the input file columns and appends the predicted columns",
+        help="Generated file containing the expected and predicted value for each "
+        "input entry. It preserves the input file columns and appends the predicted "
+        "columns",
     ),
     output_network_filename: str = typer.Option(
         "",
@@ -76,26 +85,24 @@ def train(
     ),
     log_filename: str = typer.Option(
         "",
-        help="Output path to the logfile with the training / validation error for each epoch. It can be used to monitor the training process",
-    ),
-    num_epochs: int = typer.Option(
-        100,
-        help="Number of training epochs"
+        help="Output path to the logfile with the training / validation error for "
+        "each epoch. It can be used to monitor the training process",
     ),
+    num_epochs: int = typer.Option(100, help="Number of training epochs"),
     num_samples: int = typer.Option(
         10,
         help="Number of Monte Carlo samples for ELBO based posterior estimation",
     ),
     xratio: float = typer.Option(
         0.9,
-        help="Ratio of dataset samples to be used for training (T). The validation (V) is calculated as V = 1 - T",
+        help="Ratio of dataset samples to be used for training (T). The validation "
+        "(V) is calculated as V = 1 - T",
     ),
     scale_factor: float = typer.Option(
-        1.0, help="Scaling factor to apply to the output target. Default: 1.0 (no scaling))"
-    ),
-    learning_rate: float = typer.Option(
-        1e-3, help="Optimizer learning rate"
+        1.0,
+        help="Scaling factor to apply to the output target. Default: 1.0 (no scaling))",
     ),
+    learning_rate: float = typer.Option(1e-3, help="Optimizer learning rate"),
     lambda_loss: float = typer.Option(
         1.0, help="Cross-entropy or MSE loss lambda value (hyperparameter)"
     ),
@@ -108,7 +115,8 @@ def train(
     gpu_index: int = typer.Option(0, help="Index of CUDA device to be used."),
     cpu_only: bool = typer.Option(
         False,
-        help="If set, the training will be performed on the CPU. This is useful for debugging purposes and low-spec computers.",
+        help="If set, the training will be performed on the CPU. This is useful for "
+        "debugging purposes and low-spec computers.",
     ),
 ):
     Console.info("Training")
@@ -141,25 +149,30 @@ def train(
 def predict(
     config: str = typer.Option(
         "",
-        help="[future] Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
+        help="[future] Path to a YAML configuration file. You can use the file "
+        "exclusively or overwrite any arguments via CLI.",
         callback=config_cb,
         is_eager=True,
     ),
     latent_csv: str = typer.Option(
         ...,
-        help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
+        help="Path to CSV containing the latent representation vector for each input "
+        "entry (image). The 'UUID' is used to match against the target file entries",
     ),
     latent_key: str = typer.Option(
         "latent_",
-        help="Name of the key used for the columns containing the latent vector. For example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
+        help="Name of the key used for the columns containing the latent vector. For "
+        "example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
     ),
     target_key: str = typer.Option(
         ...,
-        help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
+        help="Keyword that defines the field to be learnt/predicted. It must match "
+        "the column name in the target file",
     ),
     output_csv: str = typer.Option(
         "",
-        help="File containing the expected and inferred value for each input entry. It preserves the input file columns and appends the corresponding prediction",
+        help="File containing the expected and inferred value for each input entry. It "
+        "preserves the input file columns and appends the corresponding prediction",
     ),
     output_network_filename: str = typer.Option(
         ..., help="Trained Bayesian Neural Network in PyTorch compatible format."
@@ -174,7 +187,8 @@ def predict(
     gpu_index: int = typer.Option(0, help="Index of CUDA device to be used."),
     cpu_only: bool = typer.Option(
         False,
-        help="If set, the training will be performed on the CPU. This is useful for debugging purposes.",
+        help="If set, the training will be performed on the CPU. This is useful for "
+        "debugging purposes.",
     ),
 ):
     Console.info("Predicting")
@@ -197,25 +211,30 @@ def predict(
 def join_predictions(
     config: str = typer.Option(
         "",
-        help="Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
+        help="Path to a YAML configuration file. You can use the file exclusively or "
+        "overwrite any arguments via CLI.",
         callback=config_cb,
         is_eager=True,
     ),
     latent_csv: str = typer.Option(
         ...,
-        help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
+        help="Path to CSV containing the latent representation vector for each input "
+        "entry (image). The 'UUID' is used to match against the target file entries",
     ),
     target_csv: str = typer.Option(
         ...,
-        help="Path to CSV containing the target entries to be used for training/validation. The 'UUID' is used to match against the input file entries",
+        help="Path to CSV containing the target entries to be used for "
+        "training/validation. The 'UUID' is used to match against the input file entries",
     ),
     target_key: str = typer.Option(
         ...,
-        help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
+        help="Keyword that defines the field to be learnt/predicted. It must match the "
+        "column name in the target file",
     ),
     output_csv: str = typer.Option(
         "",
-        help="File containing the expected and inferred value for each input entry. It preserves the input file columns and appends the corresponding prediction",
+        help="File containing the expected and inferred value for each input entry. It "
+        "preserves the input file columns and appends the corresponding prediction",
     ),
 ):
     Console.info("Joining predictions")

diff --git a/src/bnn_inference/join_predictions.py b/src/bnn_inference/join_predictions.py
@@ -7,7 +7,8 @@
 
 def join_predictions_impl(latent_csv, target_csv, target_key, output_csv):
     Console.info(
-        "Postprocessing tool for predictions generated with BNN. Merges predicted entries with target values by key (uuid) and export as a single file"
+        "Postprocessing tool for predictions generated with BNN. Merges predicted "
+        "entries with target values by key (uuid) and export as a single file"
     )
 
     if os.path.isfile(target_csv):
@@ -69,17 +70,22 @@ def join_predictions_impl(latent_csv, target_csv, target_key, output_csv):
     # Typical name/header for target (ground truth file)
     # Name: M3_direct_r020_TR_ALL.csv
     # Header:[empty] | uuid | northing [m] | easting [m] | landability
-    # Data format is pretty clean, northing/easting is expected to be uuid-format compatible (no trailing decimals)
+    # Data format is pretty clean, northing/easting is expected to be uuid-format
+    # compatible (no trailing decimals)
 
     # Typical header format for predicted values (exhaustive list format)
     # Name: all_dM3h1631.csv
     # Header:
-    # uuid  valid_ratio northing [m]    easting [m] depth [m]   latitude [deg]  longitude [deg] altitude [m]    roll [deg]  pitch [deg] heading [deg]   timestamp [s]   recon_loss  landability   uncertainty
+    # uuid  valid_ratio northing [m]    easting [m] depth [m]   latitude [deg]
+    # longitude [deg] altitude [m]    roll [deg]  pitch [deg] heading [deg]
+    # timestamp [s]   recon_loss  landability   uncertainty
 
     # Columns we need for the output join
-    # [index/empty] | uuid | northing [m] from target | easting [m] from target | [score: measurability/landability] | [predicted score]
+    # [index/empty] | uuid | northing [m] from target | easting [m] from target
+    # | [score: measurability/landability] | [predicted score]
 
-    # We trim the prediction dataframe, we only need 'uuid' and the prediction + uncertainty columns
+    # We trim the prediction dataframe, we only need 'uuid' and the prediction
+    # + uncertainty columns
     dfx = df2[["uuid", index_key, "uncertainty"]]
     merged_df = pd.merge(df1, dfx, on="uuid", how="inner")
     Console.info("Exporting merged dataframes to ", output_csv)

diff --git a/src/bnn_inference/predict.py b/src/bnn_inference/predict.py
@@ -8,11 +8,11 @@
 # Author: Jose Cappelletto (j.cappelletto@soton.ac.uk)
 
 import os
+from datetime import datetime
 
 import numpy as np
 import pandas as pd
 import torch
-from datetime import datetime
 
 from bnn_inference.tools.bnn_model import BayesianRegressor
 from bnn_inference.tools.console import Console
@@ -57,7 +57,7 @@ def predict_impl(
 
     if output_csv == "":
         date_str = datetime.strftime(datetime.now(), "%Y%m%d_%H%M%S")
-        output_csv = date_str+ "_bnn_predictions.csv"
+        output_csv = date_str + "_bnn_predictions.csv"
 
     # if output file exists, warn user
     if os.path.isfile(output_csv):

diff --git a/src/bnn_inference/tools/__init__.py b/src/bnn_inference/tools/__init__.py
@@ -1,6 +1,3 @@
-import math
-import statistics
-
 # from bnn_configuration import *
 # from bnn_model import *
 # from console import *