Skip to content

Commit

Permalink
Merge pull request #40 from ocean-perception/main
Browse files Browse the repository at this point in the history
Fix #39: Revert to plain old versioning
  • Loading branch information
cappelletto committed Jun 18, 2023
2 parents 2e5b0c5 + 8162dba commit 54ed479
Show file tree
Hide file tree
Showing 14 changed files with 170 additions and 153 deletions.
5 changes: 0 additions & 5 deletions .flake8

This file was deleted.

17 changes: 17 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/python/black
rev: 22.3.0
hooks:
- id: black
pass_filenames: true
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.270
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
27 changes: 0 additions & 27 deletions .pre-commit-config.yaml.disabled

This file was deleted.

38 changes: 38 additions & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F"]
ignore = []

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
]

# Allow lines to be as long as 150 characters.
line-length = 150

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
23 changes: 10 additions & 13 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,28 @@
# package source folder: src/
# dependencies: numpy, scipy, torch, pandas, scikit-learn, blitz

from distutils.util import convert_path

import git
from setuptools import find_packages, setup

main_ns = {}
ver_path = convert_path("src/bnn_inference/version.py")
with open(ver_path) as ver_file:
exec(ver_file.read(), main_ns)


def run_setup():
# get the long description from the README file
# TODO: Merge with short/specific decription provided during setup() call
# print warning message about future deprecation of setup.py approach
print ("WARNING: setup.py will be deprecated in the future. Please refer to the most recent documentation for changes in installation instructions")
print(
"WARNING: setup.py will be deprecated in the future. Please refer to the most "
"recent documentation for changes in installation instructions"
)
with open("README.md") as f:
long_description = f.read()
if long_description is None:
long_description = "Small ML pipeline to infer image classes from latent representations of low resolution priors usin Bayesian NN"
long_description = "Small ML pipeline to infer image classes from latent "
"representations of low resolution priors usin Bayesian NN"

setup(
name="bnn_inference",
version=main_ns["__version__"],
description="Bayesian NN training/inference engine to learn mappings between latent representations of low resolution maps and high resolution maps",
version="0.0.9",
description="Bayesian NN training/inference engine to learn mappings between "
"latent representations of low resolution maps and high resolution maps",
author="Jose Cappelletto",
author_email="j.cappelletto@soton.ac.uk",
url="https://github.com/cappelletto/bayesian_inference",
Expand All @@ -55,7 +52,7 @@ def run_setup():
"Pillow>=9.1.1",
"scipy>=1.5.0",
"typer>=0.7.0",
"gitpython>=3.1.14",
"pyyaml",
],
)

Expand Down
1 change: 0 additions & 1 deletion src/bnn_inference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
from bnn_inference.tools.console import Console # noqa: F401
from bnn_inference.tools.dataloader import CustomDataloader # noqa: F401
from bnn_inference.tools.predictor import PredictiveEngine # noqa: F401
from bnn_inference.version import __version__ # noqa: F401
77 changes: 48 additions & 29 deletions src/bnn_inference/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,33 +38,42 @@ def config_cb(ctx: typer.Context, param: typer.CallbackParam, value: str):
def train(
config: str = typer.Option(
"",
help="[future] Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
help="Path to a YAML configuration file. You can use the file exclusively or "
"overwrite any arguments via CLI.",
callback=config_cb,
is_eager=True,
),
latent_csv: str = typer.Option(
...,
help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
help="Path to CSV containing the latent representation vector for each input "
"entry (image). The 'UUID' is used to match against the target file entries",
),
latent_key: str = typer.Option(
"latent_",
help="Name of the key used for the columns containing the latent vector. For example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
help="Name of the key used for the columns containing the latent vector. For "
"example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
),
target_csv: str = typer.Option(
...,
help="Path to CSV containing the target entries to be used for training/validation. The 'UUID' is used to match against the input file entries",
help="Path to CSV containing the target entries to be used for "
"training/validation. The 'UUID' is used to match against the input file entries",
),
target_key: str = typer.Option(
...,
help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
help="Keyword that defines the field to be learnt/predicted. It must match the "
"column name in the target file",
),
uuid_key: str = typer.Option(
"relative_path",
help="Unique identifier string used as key for input/target example matching. The UUID string must match for both the input (latent) file and the target file column identifier",
help="Unique identifier string used as key for input/target example matching. "
"The UUID string must match for both the input (latent) file and the target "
"file column identifier",
),
output_csv: str = typer.Option(
"",
help="Generated file containing the expected and predicted value for each input entry. It preserves the input file columns and appends the predicted columns",
help="Generated file containing the expected and predicted value for each "
"input entry. It preserves the input file columns and appends the predicted "
"columns",
),
output_network_filename: str = typer.Option(
"",
Expand All @@ -76,26 +85,24 @@ def train(
),
log_filename: str = typer.Option(
"",
help="Output path to the logfile with the training / validation error for each epoch. It can be used to monitor the training process",
),
num_epochs: int = typer.Option(
100,
help="Number of training epochs"
help="Output path to the logfile with the training / validation error for "
"each epoch. It can be used to monitor the training process",
),
num_epochs: int = typer.Option(100, help="Number of training epochs"),
num_samples: int = typer.Option(
10,
help="Number of Monte Carlo samples for ELBO based posterior estimation",
),
xratio: float = typer.Option(
0.9,
help="Ratio of dataset samples to be used for training (T). The validation (V) is calculated as V = 1 - T",
help="Ratio of dataset samples to be used for training (T). The validation "
"(V) is calculated as V = 1 - T",
),
scale_factor: float = typer.Option(
1.0, help="Scaling factor to apply to the output target. Default: 1.0 (no scaling))"
),
learning_rate: float = typer.Option(
1e-3, help="Optimizer learning rate"
1.0,
help="Scaling factor to apply to the output target. Default: 1.0 (no scaling))",
),
learning_rate: float = typer.Option(1e-3, help="Optimizer learning rate"),
lambda_loss: float = typer.Option(
1.0, help="Cross-entropy or MSE loss lambda value (hyperparameter)"
),
Expand All @@ -108,7 +115,8 @@ def train(
gpu_index: int = typer.Option(0, help="Index of CUDA device to be used."),
cpu_only: bool = typer.Option(
False,
help="If set, the training will be performed on the CPU. This is useful for debugging purposes and low-spec computers.",
help="If set, the training will be performed on the CPU. This is useful for "
"debugging purposes and low-spec computers.",
),
):
Console.info("Training")
Expand Down Expand Up @@ -141,25 +149,30 @@ def train(
def predict(
config: str = typer.Option(
"",
help="[future] Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
help="[future] Path to a YAML configuration file. You can use the file "
"exclusively or overwrite any arguments via CLI.",
callback=config_cb,
is_eager=True,
),
latent_csv: str = typer.Option(
...,
help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
help="Path to CSV containing the latent representation vector for each input "
"entry (image). The 'UUID' is used to match against the target file entries",
),
latent_key: str = typer.Option(
"latent_",
help="Name of the key used for the columns containing the latent vector. For example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
help="Name of the key used for the columns containing the latent vector. For "
"example, a h=8 vector should be read as 'latent_0,latent_1,...,latent_7'",
),
target_key: str = typer.Option(
...,
help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
help="Keyword that defines the field to be learnt/predicted. It must match "
"the column name in the target file",
),
output_csv: str = typer.Option(
"",
help="File containing the expected and inferred value for each input entry. It preserves the input file columns and appends the corresponding prediction",
help="File containing the expected and inferred value for each input entry. It "
"preserves the input file columns and appends the corresponding prediction",
),
output_network_filename: str = typer.Option(
..., help="Trained Bayesian Neural Network in PyTorch compatible format."
Expand All @@ -174,7 +187,8 @@ def predict(
gpu_index: int = typer.Option(0, help="Index of CUDA device to be used."),
cpu_only: bool = typer.Option(
False,
help="If set, the training will be performed on the CPU. This is useful for debugging purposes.",
help="If set, the training will be performed on the CPU. This is useful for "
"debugging purposes.",
),
):
Console.info("Predicting")
Expand All @@ -197,25 +211,30 @@ def predict(
def join_predictions(
config: str = typer.Option(
"",
help="Path to a YAML configuration file. You can use the file exclusively or overwrite any arguments via CLI.",
help="Path to a YAML configuration file. You can use the file exclusively or "
"overwrite any arguments via CLI.",
callback=config_cb,
is_eager=True,
),
latent_csv: str = typer.Option(
...,
help="Path to CSV containing the latent representation vector for each input entry (image). The 'UUID' is used to match against the target file entries",
help="Path to CSV containing the latent representation vector for each input "
"entry (image). The 'UUID' is used to match against the target file entries",
),
target_csv: str = typer.Option(
...,
help="Path to CSV containing the target entries to be used for training/validation. The 'UUID' is used to match against the input file entries",
help="Path to CSV containing the target entries to be used for "
"training/validation. The 'UUID' is used to match against the input file entries",
),
target_key: str = typer.Option(
...,
help="Keyword that defines the field to be learnt/predicted. It must match the column name in the target file",
help="Keyword that defines the field to be learnt/predicted. It must match the "
"column name in the target file",
),
output_csv: str = typer.Option(
"",
help="File containing the expected and inferred value for each input entry. It preserves the input file columns and appends the corresponding prediction",
help="File containing the expected and inferred value for each input entry. It "
"preserves the input file columns and appends the corresponding prediction",
),
):
Console.info("Joining predictions")
Expand Down
16 changes: 11 additions & 5 deletions src/bnn_inference/join_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

def join_predictions_impl(latent_csv, target_csv, target_key, output_csv):
Console.info(
"Postprocessing tool for predictions generated with BNN. Merges predicted entries with target values by key (uuid) and export as a single file"
"Postprocessing tool for predictions generated with BNN. Merges predicted "
"entries with target values by key (uuid) and export as a single file"
)

if os.path.isfile(target_csv):
Expand Down Expand Up @@ -69,17 +70,22 @@ def join_predictions_impl(latent_csv, target_csv, target_key, output_csv):
# Typical name/header for target (ground truth file)
# Name: M3_direct_r020_TR_ALL.csv
# Header:[empty] | uuid | northing [m] | easting [m] | landability
# Data format is pretty clean, northing/easting is expected to be uuid-format compatible (no trailing decimals)
# Data format is pretty clean, northing/easting is expected to be uuid-format
# compatible (no trailing decimals)

# Typical header format for predicted values (exhaustive list format)
# Name: all_dM3h1631.csv
# Header:
# uuid valid_ratio northing [m] easting [m] depth [m] latitude [deg] longitude [deg] altitude [m] roll [deg] pitch [deg] heading [deg] timestamp [s] recon_loss landability uncertainty
# uuid valid_ratio northing [m] easting [m] depth [m] latitude [deg]
# longitude [deg] altitude [m] roll [deg] pitch [deg] heading [deg]
# timestamp [s] recon_loss landability uncertainty

# Columns we need for the output join
# [index/empty] | uuid | northing [m] from target | easting [m] from target | [score: measurability/landability] | [predicted score]
# [index/empty] | uuid | northing [m] from target | easting [m] from target
# | [score: measurability/landability] | [predicted score]

# We trim the prediction dataframe, we only need 'uuid' and the prediction + uncertainty columns
# We trim the prediction dataframe, we only need 'uuid' and the prediction
# + uncertainty columns
dfx = df2[["uuid", index_key, "uncertainty"]]
merged_df = pd.merge(df1, dfx, on="uuid", how="inner")
Console.info("Exporting merged dataframes to ", output_csv)
Expand Down
4 changes: 2 additions & 2 deletions src/bnn_inference/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
# Author: Jose Cappelletto (j.cappelletto@soton.ac.uk)

import os
from datetime import datetime

import numpy as np
import pandas as pd
import torch
from datetime import datetime

from bnn_inference.tools.bnn_model import BayesianRegressor
from bnn_inference.tools.console import Console
Expand Down Expand Up @@ -57,7 +57,7 @@ def predict_impl(

if output_csv == "":
date_str = datetime.strftime(datetime.now(), "%Y%m%d_%H%M%S")
output_csv = date_str+ "_bnn_predictions.csv"
output_csv = date_str + "_bnn_predictions.csv"

# if output file exists, warn user
if os.path.isfile(output_csv):
Expand Down
3 changes: 0 additions & 3 deletions src/bnn_inference/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import math
import statistics

# from bnn_configuration import *
# from bnn_model import *
# from console import *
Expand Down

0 comments on commit 54ed479

Please sign in to comment.