diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 525d861..b42b801 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python application +name: CI pipeline on: push: diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml new file mode 100644 index 0000000..113baae --- /dev/null +++ b/.github/workflows/static.yml @@ -0,0 +1,79 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy Documentation on Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -U sphinx + pip install furo + + - name: Build documentation + run: | + cd docs + + + sphinx-apidoc -e -M --force -o . .. + make html + - name: Upload build data + uses: actions/upload-artifact@v3 + with: + name: documentation + path: ./docs/_build/html + + deploy: + needs: build + environment: + name: documentation + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Pages + uses: actions/configure-pages@v3 + - name: Download built directory + uses: actions/download-artifact@v3 + with: + name: documentation + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + # Upload entire repository + path: '.' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 + with: + folder: build diff --git a/.gitignore b/.gitignore index e69de29..f9b1cf3 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +/archive/ +/archive_resized/ +/checkpoints/ +/exported_models/ +archive.lnk + +colab_notebook.ipynb +/.vscode/ + + diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..bf5e2eb --- /dev/null +++ b/.pylintrc @@ -0,0 +1,790 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore= + CVS, + migrations, + tests.py, + admin.py + + +# Add files or directories matching the regex patterns to the ignore-list. The +# regex matches against paths and can be in Posix or Windows format. +ignore-paths= + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. The default value ignores Emacs file +# locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable = all + +; disable=raw-checker-failed, +; bad-inline-option, +; locally-disabled, +; file-ignored, +; suppressed-message, +; useless-suppression, +; deprecated-pragma, +; use-symbolic-message-instead + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. + +enable = + blacklisted-name, + line-too-long, + + abstract-class-instantiated, + abstract-method, + access-member-before-definition, + anomalous-backslash-in-string, + anomalous-unicode-escape-in-string, + arguments-differ, + assert-on-tuple, + assigning-non-slot, + assignment-from-no-return, + assignment-from-none, + attribute-defined-outside-init, + bad-except-order, + bad-format-character, + bad-format-string-key, + bad-format-string, + bad-open-mode, + bad-reversed-sequence, + bad-staticmethod-argument, + bad-str-strip-call, + bad-super-call, + binary-op-exception, + boolean-datetime, + catching-non-exception, + cell-var-from-loop, + confusing-with-statement, + continue-in-finally, + dangerous-default-value, + duplicate-argument-name, + duplicate-bases, + duplicate-except, + duplicate-key, + expression-not-assigned, + format-combined-specification, + format-needs-mapping, + function-redefined, + global-variable-undefined, + import-error, + import-self, + inconsistent-mro, + inherit-non-class, + init-is-generator, + invalid-all-object, + invalid-format-index, + invalid-length-returned, + invalid-sequence-index, + invalid-slice-index, + invalid-slots-object, + invalid-slots, + invalid-unary-operand-type, + logging-too-few-args, + logging-too-many-args, + logging-unsupported-format, + lost-exception, + method-hidden, + misplaced-bare-raise, + misplaced-future, + missing-format-argument-key, + missing-format-attribute, + missing-format-string-key, + + no-method-argument, + no-name-in-module, + no-self-argument, + no-value-for-parameter, + non-iterator-returned, + nonexistent-operator, + not-a-mapping, + not-an-iterable, + not-callable, + not-context-manager, + not-in-loop, + pointless-statement, + pointless-string-statement, + raising-bad-type, + raising-non-exception, + redefined-builtin, + redefined-outer-name, + redundant-keyword-arg, + repeated-keyword, + return-arg-in-generator, + return-in-init, + return-outside-function, + signature-differs, + super-init-not-called, + syntax-error, + too-few-format-args, + too-many-format-args, + too-many-function-args, + truncated-format-string, + undefined-all-variable, + undefined-loop-variable, + undefined-variable, + unexpected-keyword-arg, + unexpected-special-method-signature, + unpacking-non-sequence, + unreachable, + unsubscriptable-object, + unsupported-binary-operation, + unsupported-membership-test, + unused-format-string-argument, + unused-format-string-key, + used-before-assignment, + using-constant-test, + yield-outside-function, + + astroid-error, + fatal, + method-check-failed, + parse-error, + raw-checker-failed, + + unused-argument, + unused-import, + unused-variable, + + eval-used, + exec-used, + + bad-classmethod-argument, + bad-mcs-classmethod-argument, + bad-mcs-method-argument, + bare-except, + broad-except, + consider-iterating-dictionary, + consider-using-enumerate, + global-at-module-level, + global-variable-not-assigned, + logging-format-interpolation, + logging-not-lazy, + multiple-imports, + multiple-statements, + no-classmethod-decorator, + no-staticmethod-decorator, + protected-access, + redundant-unittest-assert, + reimported, + simplifiable-if-statement, + singleton-comparison, + superfluous-parens, + unidiomatic-typecheck, + unnecessary-lambda, + unnecessary-pass, + unnecessary-semicolon, + unneeded-not, + useless-else-on-loop, + + deprecated-method, + deprecated-module, + + too-many-boolean-expressions, + too-many-nested-blocks, + + wildcard-import, + wrong-import-order, + wrong-import-position, + + missing-final-newline, + mixed-line-endings, + trailing-newlines, + trailing-whitespace, + unexpected-line-ending-format, + + bad-inline-option, + bad-option-value, + deprecated-pragma, + unrecognized-inline-option, + useless-suppression, + +; enable=c-extension-no-member + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=BaseException, + Exception + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 diff --git a/README.md b/README.md index e69de29..948c251 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,19 @@ +![example branch parameter](https://github.com/guorbit/utilities/actions/workflows/python-app.yml/badge.svg?branch=migrating_segmentation_utils) + +Note before installation: None of these commands have been properly tested. Make sure you installed the package in a virtual environment. + +For installing the utilities repo as a package use the following commands in the terminal: +Note: you need to have a working ssh key to access github from your current machine. + +``` +pip install git+ssh://git@github.com:guorbit/utilities.git + +``` + + +Alternatively the following command can be used to install a git repo AFTER cloning it: +Note: the path below has to be modified to point to the package directory. +``` +pip install git+file:///path/to/your/package#egg=package-name + +``` diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..ef96845 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,44 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + + +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + + + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + + + +project = 'GU Orbit Software Utilities' +copyright = '2023, GU Orbit Software Team' +author = 'GU Orbit Software Team' +release = '0.1.0' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration +add_module_names = False +extensions = ['sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc'] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +#insegel theme +#furo theme + +html_theme = 'furo' + + +html_static_path = ['style'] +html_css_files = ['custom.css'] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..930e21c --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,20 @@ +.. GU Orbit Software Utilities documentation master file, created by + sphinx-quickstart on Thu Mar 16 16:56:02 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to GU Orbit Software Utilities's documentation! +======================================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..32bb245 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/style/custom.css b/docs/style/custom.css new file mode 100644 index 0000000..74826ff --- /dev/null +++ b/docs/style/custom.css @@ -0,0 +1,3 @@ +dl.py .field-list dt { + text-transform: none !important; +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a5d8dbe --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +tensorflow==2.10 +numpy==1.24.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..cd279af --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +from setuptools import find_packages, setup + +with open("README.md", "r") as f: + long_description = f.read() + + +setup( + name="guorbit_utils", + version="0.1.0", + author="GU Orbit Software Team", + author_email="", + description="A package containing utilities for GU Orbit Software", + long_description=long_description, + long_description_content_type="text/markdown", + url="", + packages=find_packages(where="utilities"), + package_dir={"": "utilities"}, + license="MIT", + classifiers=[ + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires=">=3.10", + install_requires=[ + "numpy >= 1.24.0", + "rasterio >= 1.3.6", + "Pillow >= 9.4.0", + "tensorflow >= 2.10", + ], + extras_require={ + "dev": [ + "pytest >= 7.2.2", + "pytest-cov >= 4.0.0", + "twine >= 4.0.0", + ] + }, +) \ No newline at end of file diff --git a/tests/segmentation_utils_tests.py/flow_reader_test.py b/tests/segmentation_utils_tests.py/flow_reader_test.py new file mode 100644 index 0000000..5c918c6 --- /dev/null +++ b/tests/segmentation_utils_tests.py/flow_reader_test.py @@ -0,0 +1,201 @@ +import os +import numpy as np +import tensorflow as tf +from keras.preprocessing.image import ImageDataGenerator +from pytest import MonkeyPatch +from utilities.segmentation_utils import ImagePreprocessor +from utilities.segmentation_utils.flowreader import FlowGenerator + + +# mock implementations +def flow_from_directory_mock(*args, **kwargs): + channels = 3 + if "color_mode" in kwargs and kwargs["color_mode"] == "grayscale": + channels = 1 + + batch = np.zeros((2, kwargs["target_size"][0], kwargs["target_size"][1], channels)) + return batch + + +generator_args = { + "image_path": "tests/segmentation_utils_tests/flow_reader_test", + "mask_path": "tests/segmentation_utils_tests/flow_reader_test", + "image_size": (512, 512), + "output_size": (256 * 256, 1), + "num_classes": 7, + "shuffle": True, + "batch_size": 2, + "seed": 909, +} + +mock_onehot_fn = lambda x, y, z: np.rollaxis(np.array([x for i in range(z)]), 0, 3) +mock_augmentation_fn = lambda x, y, z, a, b: (x, y) + +# tests +def test_makes_flow_generator() -> None: + patch = MonkeyPatch() + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + patch.setattr(FlowGenerator, "preprocess", lambda self, x, *args, **kwargs: x) + # create a flow generator + FlowGenerator(**generator_args) + + +def test_makes_flow_generator_with_queue() -> None: + patch = MonkeyPatch() + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + patch.setattr(FlowGenerator, "preprocess", lambda self, x, *args, **kwargs: x) + + # create dummy queues + image_queue = ImagePreprocessor.PreprocessingQueue( + [lambda x, y, seed: x], [{"y": 1}] + ) + mask_queue = ImagePreprocessor.PreprocessingQueue( + [lambda x, y, seed: x], [{"y": 1}] + ) + + # create a copy of the generator args + new_generator_args = generator_args.copy() + new_generator_args["preprocessing_queue_image"] = image_queue + new_generator_args["preprocessing_queue_mask"] = mask_queue + + # create a flow generator + FlowGenerator(**new_generator_args) + + +def test_makes_flow_generator_wrong_shape() -> None: + try: + patch = MonkeyPatch() + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + patch.setattr(FlowGenerator, "preprocess", lambda self, x, *args, **kwargs: x) + + fail_generator = generator_args.copy() + # create a flow generator + fail_generator["output_size"] = (256, 256, 256) + FlowGenerator(**fail_generator) + assert False + except ValueError: + assert True + + +def test_makes_flow_generator_wrong_dimension() -> None: + try: + patch = MonkeyPatch() + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + patch.setattr(FlowGenerator, "preprocess", lambda self, x, *args, **kwargs: x) + + fail_generator = generator_args.copy() + # create a flow generator + fail_generator["output_size"] = (256 * 256, 2) + FlowGenerator(**fail_generator) + assert False + except ValueError: + assert True + + +def test_flow_generator_with_preprocess() -> None: + patch = MonkeyPatch() + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + + # mock external dependencies + patch.setattr(ImagePreprocessor, "augmentation_pipeline", mock_augmentation_fn) + patch.setattr( + ImagePreprocessor, + "onehot_encode", + mock_onehot_fn, + ) + + # create a flow generator + FlowGenerator(**generator_args) + patch.undo() + patch.undo() + + +def test_get_dataset_size() -> None: + patch = MonkeyPatch() + patch.setattr(os, "listdir", lambda x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + # mock an imagedatagenerator from keras + patch.setattr( + ImageDataGenerator, + "flow_from_directory", + flow_from_directory_mock, + ) + # mock external dependencies + patch.setattr(ImagePreprocessor, "augmentation_pipeline", mock_augmentation_fn) + patch.setattr( + ImagePreprocessor, + "onehot_encode", + mock_onehot_fn, + ) + # create a flow generator + flow_generator = FlowGenerator(**generator_args) + size = flow_generator.get_dataset_size() + assert size == 10 + patch.undo() + patch.undo() + patch.undo() + + +def test_get_generator() -> None: + patch = MonkeyPatch() + + # mock external dependencies + patch.setattr(ImagePreprocessor, "augmentation_pipeline", mock_augmentation_fn) + patch.setattr( + ImagePreprocessor, + "onehot_encode", + mock_onehot_fn, + ) + + # create a flow generator + flow_generator = FlowGenerator(**generator_args) + generator = flow_generator.get_generator() + + assert generator != None + patch.undo() + patch.undo() + + +def test_reader_error_raised() -> None: + try: + # predifining input variables + image = np.zeros((512, 512, 3)) + mask = np.zeros((256, 256, 1)) + image = tf.convert_to_tensor(image) + mask = tf.convert_to_tensor(mask) + # + # createing dummy queues + image_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda x, y, seed: x], arguments=[{"y": 1}] + ) + new_generator_args = generator_args.copy() + new_generator_args["preprocessing_queue_image"] = image_queue + # create a flow generator + FlowGenerator(**new_generator_args) + assert False + except ValueError: + assert True diff --git a/tests/segmentation_utils_tests.py/image_preprocessor_test.py b/tests/segmentation_utils_tests.py/image_preprocessor_test.py new file mode 100644 index 0000000..83c3e2f --- /dev/null +++ b/tests/segmentation_utils_tests.py/image_preprocessor_test.py @@ -0,0 +1,127 @@ +import numpy as np +import tensorflow as tf +from utilities.segmentation_utils import ImagePreprocessor + + + +def test_image_onehot_encoder() -> None: + # predifining input variables + n_classes = 2 + batch_size = 1 + image_size = (512, 512) + output_size = (256, 256) + + # creating a mask with 2 classes + mask = np.zeros((batch_size, output_size[0] * output_size[1])) + mask[:, ::2] = 1 + + # creating a onehot mask to compare with the output of the function + onehot_test = np.zeros((batch_size, output_size[0] * output_size[1], n_classes)) + onehot_test[:, ::2, 1] = 1 + onehot_test[:, 1::2, 0] = 1 + + one_hot_image = ImagePreprocessor.onehot_encode(mask, output_size, n_classes) + + assert one_hot_image.shape == ( + 1, + image_size[0] // 2 * image_size[1] // 2, + n_classes, + ) + assert np.array_equal(one_hot_image, onehot_test) + + +def test_image_augmentation_pipeline_column() -> None: + # predifining input variables + image = np.zeros((512, 512, 3)) + mask = np.zeros((256 * 256, 1)) + image = tf.convert_to_tensor(image) + mask = tf.convert_to_tensor(mask) + + input_size = (512, 512) + output_size = (256 * 256, 1) + output_reshape = (256, 256) + + # createing dummy queues + image_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda x, y, seed: x], arguments=[{"y": 1}] + ) + mask_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda x, y, seed: x], arguments=[{"y": 1}] + ) + + image_new, mask_new = ImagePreprocessor.augmentation_pipeline( + image, mask, input_size, output_size, output_reshape, image_queue, mask_queue + ) + image_new = image_new.numpy() + mask_new = mask_new.numpy() + + assert np.array(image_new).shape == (512, 512, 3) + assert np.array(mask_new).shape == (256 * 256, 1, 1) + + +def test_image_augmentation_pipeline_squarematrix() -> None: + # predifining input variables + image = np.zeros((512, 512, 3)) + mask = np.zeros((256, 256, 1)) + image = tf.convert_to_tensor(image) + mask = tf.convert_to_tensor(mask) + + input_size = (512, 512) + output_size = (256, 256) + + # createing dummy queues + image_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda x, y, seed: x], arguments=[{"y": 1}] + ) + mask_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda x, y, seed: x], arguments=[{"y": 1}] + ) + + image_new, mask_new = ImagePreprocessor.augmentation_pipeline( + image, + mask, + input_size, + output_size, + image_queue=image_queue, + mask_queue=mask_queue, + ) + image_new = image_new.numpy() + mask_new = mask_new.numpy() + + assert image_new.shape == (512, 512, 3) + assert mask_new.shape == (256, 256, 1) + + +def test_processing_queue() -> None: + # createing dummy queues + image_queue = ImagePreprocessor.PreprocessingQueue( + queue=[lambda seed: seed], arguments=[dict(seed=1)] + ) + + # changing the seed + new_seed = 5 + image_queue.update_seed(new_seed) + + assert image_queue.arguments[0]["seed"] == new_seed + + +def test_generate_default_queue() -> None: + # createing default queues + image_queue, mask_queue = ImagePreprocessor.generate_default_queue() + + # changing the seed + new_seed = 5 + image_queue.update_seed(new_seed) + + assert image_queue.arguments[0]["seed"] == new_seed + assert image_queue.get_queue_length() == 6 + assert mask_queue.get_queue_length() == 2 + + +def test_flatten() -> None: + image = np.zeros((512, 512, 3)) + image = tf.convert_to_tensor(image) + image = ImagePreprocessor.flatten(image, (512, 512), 3) + image = image.numpy() + assert image.shape == (512 * 512, 1, 3) + diff --git a/utilities/image_cutting.py b/utilities/image_cutting.py index 99a1b0f..e84d6df 100644 --- a/utilities/image_cutting.py +++ b/utilities/image_cutting.py @@ -2,7 +2,6 @@ from numpy.typing import NDArray import os import rasterio -from matplotlib import pyplot as plt import pathlib from PIL import Image from typing import Any diff --git a/utilities/segmentation_utils/ImagePreprocessor.py b/utilities/segmentation_utils/ImagePreprocessor.py new file mode 100644 index 0000000..843843c --- /dev/null +++ b/utilities/segmentation_utils/ImagePreprocessor.py @@ -0,0 +1,193 @@ +from dataclasses import dataclass +from typing import Callable, Dict +import numpy as np +import tensorflow as tf + + + +@dataclass +class PreprocessingQueue: + """ + object to initialize a preprocessing queue + + Parameters + ---------- + :queue list: list of functions to be applied + :arguments list[dict]: list of arguments to be passed to the functions + """ + + queue: list[Callable] + arguments: list[Dict] + + def update_seed(self, seed): + """ + Changes the seed of the queue + + Parameters + ---------- + :seed int: seed to be changed to + """ + for i in self.arguments: + i["seed"] = seed + + def get_queue_length(self): + """ + Returns the length of the queue + + Returns + ------- + :return: length of the queue + :rtype: int + """ + return len(self.queue) + + +def generate_default_queue(seed=0): + """ + Generates the default processing queue + + Keyword Arguments + ----------------- + :seed int: seed to be used for the random functions + + Returns + ------- + :return PreprocessingQueue: default queue + """ + image_queue = PreprocessingQueue( + queue=[ + tf.image.random_flip_left_right, + tf.image.random_flip_up_down, + tf.image.random_brightness, + tf.image.random_contrast, + tf.image.random_saturation, + tf.image.random_hue, + ], + arguments=[ + {"seed": seed}, + {"seed": seed}, + {"max_delta": 0.2, "seed": seed}, + {"lower": 0.8, "upper": 1.2, "seed": seed}, + {"lower": 0.8, "upper": 1.2, "seed": seed}, + {"max_delta": 0.2, "seed": seed}, + ], + ) + mask_queue = PreprocessingQueue( + queue=[ + tf.image.random_flip_left_right, + tf.image.random_flip_up_down, + ], + arguments=[ + {"seed": seed}, + {"seed": seed}, + ], + ) + return image_queue, mask_queue + + +def onehot_encode(masks, output_size, num_classes): + """ + Function that one-hot encodes masks + + :batch(tf.Tensor) masks: Masks to be encoded + :tuple(int, int) output_size: Output size of the masks + :int num_classes: Number of classes in the masks + + Returns + ------- + :return: Encoded masks + :rtype: batch(tf.Tensor) + """ + encoded = np.zeros((masks.shape[0], output_size[0] * output_size[1], num_classes)) + for i in range(num_classes): + encoded[:, :, i] = tf.squeeze((masks == i).astype(int)) + return encoded + + +def augmentation_pipeline( + image, + mask, + input_size: tuple[int, int], + output_size: tuple[int, int], + output_reshape: tuple[int, int] = None, + image_queue: PreprocessingQueue = None, + mask_queue: PreprocessingQueue = None, + channels: int = 3, + seed: int = 0, +): + """ + Function that can execute a set of predifined augmentation functions + stored in a PreprocessingQueue object. It augments both the image and the mask + with the same functions and arguments. + + Parameters + ---------- + :tf.Tensor image: The image to be processed + :tf.Tensor mask: The mask to be processed + :tuple(int, int) input_size: Input size of the image + :tuple(int, int) output_size: Output size of the image + + + Keyword Arguments + ----------------- + :tuple(int, int), optional output_reshape: In case the image is a column vector, + this is the shape it should be reshaped to. Defaults to None. + + :PreprocessingQueue, optional mask_queue image_queue: + Augmentation processing queue for images, defaults to None + + :PreprocessingQueue, optional mask_queue: Augmentation processing queue + for masks, defaults to None + + :int, optional channels: Number of bands in the image, defaults to 3 + :int, optional seed: The seed to be used in the pipeline, defaults to 0 + + Raises + ------ + :raises ValueError: If only one of the queues is passed + + Returns + ------- + :return: tuple of the processed image and mask + :rtype: tuple(tf.Tensor, tf.Tensor) + """ + + # reshapes masks, such that transforamtions work properly + if output_size[1] == 1: + mask = tf.reshape(mask, (output_reshape[0], output_reshape[1], 1)) + + image_queue.update_seed(seed) + mask_queue.update_seed(seed) + + for i, fun in enumerate(image_queue.queue): + image = fun(image, **image_queue.arguments[i]) + + for i, fun in enumerate(mask_queue.queue): + mask = fun(mask, **mask_queue.arguments[i]) + + # flattens masks out to the correct output shape + if output_size[1] == 1: + mask = flatten(mask, output_size, channels=1) + return image, mask + + +def flatten(image, input_size, channels=1): + """flatten + Function that flattens an image preserving the number of channels + + Parameters + ---------- + :tf.Tensor image: image to be flattened + :tuple(int, int) input_size: input size of the image + + Keyword Arguments + ----------------- + :int, optional channels: number of chanels to preserve, defaults to 1 + + Returns + ------- + :return: flattened image + :rtype: tf.Tensor + """ + # the 1 is required to preserve the shape similar to the original + return tf.reshape(image, (input_size[0] * input_size[1], 1, channels)) diff --git a/utilities/segmentation_utils/__init__.py b/utilities/segmentation_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utilities/segmentation_utils/flowreader.py b/utilities/segmentation_utils/flowreader.py new file mode 100644 index 0000000..f3e94d5 --- /dev/null +++ b/utilities/segmentation_utils/flowreader.py @@ -0,0 +1,195 @@ +""" +FlowGenerator is a wrapper around the keras ImageDataGenerator class. +""" + +import os +import math +import numpy as np +from keras.preprocessing.image import ImageDataGenerator +from utilities.segmentation_utils import ImagePreprocessor + + +class FlowGenerator: + """ + FlowGenerator is a wrapper around the keras ImageDataGenerator class. + It can be used to read in images for semantic segmentation. + Additionally, the reader can apply augmentation on the images, + and one-hot encode them on the fly. + """ + + def __init__( + self, + image_path: str, + mask_path: str, + image_size: tuple[int, int], + output_size: tuple[int, int], + num_classes: int, + shuffle: bool = True, + batch_size: int = 2, + preprocessing_enabled: bool = True, + seed: int = 909, + preprocessing_seed: int = None, + preprocessing_queue_image: ImagePreprocessor.PreprocessingQueue = None, + preprocessing_queue_mask: ImagePreprocessor.PreprocessingQueue = None, + ): + """ + Initializes the flow generator object, + which can be used to read in images for semantic segmentation. + Additionally, the reader can apply augmentation on the images, + and one-hot encode them on the fly. + + Parameters + ---------- + :string image: path to the image directory + :string mask: path to the mask directory + :int batch_size: batch size + :tuple image_size: image size + :tuple output_size: output size + #! Note: in case the output is a column vector it has to be in the shape (x, 1) + + :int num_classes: number of classes + + Keyword Arguments + ----------------- + :bool shuffle: whether to shuffle the dataset or not + :int batch_size: batch size + :bool preprocessing_enabled: whether to apply preprocessing or not + :int seed: seed for flow from directory + :int preprocessing_seed: seed for preprocessing, defaults to None + :PreprocessingQueue preprocessing_queue_image: preprocessing queue for images + :PreprocessingQueue preprocessing_queue_mask: preprocessing queue for masks + + Raises + ------ + ValueError: if the output size is not a tuple of length 2 + ValueError: if the output size is not a square matrix or a column vector + """ + + if len(output_size) != 2: + raise ValueError("The output size has to be a tuple of length 2") + if output_size[1] != 1 and output_size[0] != output_size[1]: + raise ValueError( + "The output size has to be a square matrix or a column vector" + ) + + self.image_path = image_path + self.mask_path = mask_path + self.batch_size = batch_size + self.image_size = image_size + self.output_size = output_size + self.num_classes = num_classes + self.shuffle = shuffle + self.seed = seed + self.preprocessing_enabled = preprocessing_enabled + self.preprocessing_queue_image = preprocessing_queue_image + self.preprocessing_queue_mask = preprocessing_queue_mask + self.preprocessing_seed = preprocessing_seed + self.__make_generator() + print("Reading images from: ", self.image_path) + + def get_dataset_size(self): + """ + Returns the length of the dataset + + Returns + ------- + :returns: length of the dataset + :rtype: int + + """ + + return len(os.listdir(os.path.join(self.image_path, "img"))) + + def __make_generator(self): + """ + Creates the generator + """ + + image_datagen = ImageDataGenerator() + mask_datagen = ImageDataGenerator() + + if self.output_size[1] == 1: + # only enters if the output is a column vector + # such no need to define it otherwise + dimension = math.sqrt(self.output_size[0]) + self.output_reshape = (int(dimension), int(dimension)) + + image_generator = image_datagen.flow_from_directory( + self.image_path, + class_mode=None, + seed=self.seed, + batch_size=self.batch_size, + target_size=self.image_size, + ) + + mask_generator = mask_datagen.flow_from_directory( + self.mask_path, + class_mode=None, + seed=self.seed, + batch_size=self.batch_size, + target_size=self.output_size, + color_mode="grayscale", + ) + if self.preprocessing_queue_image is None and self.preprocessing_queue_mask is None: + #!Possibly in the wrong place as it has to be regenerated every time + ( + self.preprocessing_queue_image, + self.preprocessing_queue_mask, + ) = ImagePreprocessor.generate_default_queue() + elif self.preprocessing_queue_image is None or self.preprocessing_queue_mask is None: + raise ValueError("Both queues must be passed or none") + + self.train_generator = zip(image_generator, mask_generator) + self.train_generator = self.preprocess(self.train_generator) + + def get_generator(self): + """ + Returns the generator object + + Returns + ------- + :return: generator object + :rtype: generator + + """ + return self.train_generator + + def preprocess(self, generator_zip): + """ + Preprocessor function encapsulates both the image, and mask generator objects. + Augments the images and masks and onehot encodes the masks + + Parameters + ---------- + :tuple generator_zip: tuple of image and mask generator + :int, optional state: random state for reproducibility, defaults to None + + Returns + ------- + :return: generator batch of image and mask + :rtype: batch(tuple) + """ + for (img, mask) in generator_zip: + if self.preprocessing_enabled: + for i_image,i_mask in zip(img, mask): + # random state for reproducibility + if self.preprocessing_seed is None: + image_seed = np.random.randint(0, 100000) + else: + state = np.random.RandomState(state) + image_seed = state.randint(0, 100000) + + i_image, i_mask = ImagePreprocessor.augmentation_pipeline( + i_image, + i_mask, + self.image_size, + self.output_size, + self.output_reshape, + seed=image_seed, + image_queue=self.preprocessing_queue_image, + mask_queue=self.preprocessing_queue_mask, + ) + mask = ImagePreprocessor.onehot_encode( + mask, self.output_size, self.num_classes + ) + yield (img, mask)