Merge pull request #384 from Wikidata/depencencies-update

Dependencies update. See #384 (comment) for important details
Wikidata · Apr 1, 2020 · 020a792 · 020a792
2 parents 1aabe54 + 23afc3e
commit 020a792
Show file tree

Hide file tree

Showing 11 changed files with 239 additions and 118 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,7 @@ dist: xenial
 language: python
 python: 3.7
 script:
-    - isort -y -rc soweego
+    - isort -y -rc -s soweego/cli.py soweego
     - autoflake -i -r --remove-all-unused-imports --remove-unused-variables soweego
     - black -S -l 80 soweego
     - |

diff --git a/requirements.in b/requirements.in
@@ -0,0 +1,27 @@
+click
+iso8601
+joblib
+keras
+lxml
+mlens
+pymysql
+pywikibot
+recordlinkage
+regex
+sqlalchemy
+tensorflow
+tqdm
+# Dev
+autoflake
+autopep8
+black
+flake8
+ipdb
+ipython
+isort
+mypy
+pip-tools
+pylint
+sphinx
+sphinx-autodoc-typehints
+sphinx-click
diff --git a/requirements.txt b/requirements.txt
@@ -1,93 +1,115 @@
-absl-py==0.7.1
-alabaster==0.7.12
-appdirs==1.4.3
-appnope==0.1.0
-astor==0.8.0
-astroid==2.2.5
-attrs==19.1.0
-autoflake==1.3
-autopep8==1.4.4
-Babel==2.7.0
-backcall==0.1.0
-black==19.3b0
-certifi==2019.6.16
-chardet==3.0.4
-Click==7.0
-decorator==4.4.0
-docutils==0.14
-entrypoints==0.3
-flake8==3.7.7
-gast==0.2.2
-google-pasta==0.1.7
-grpcio==1.22.0rc1
-h5py==2.9.0
-idna==2.8
-imagesize==1.1.0
-ipdb==0.12
-ipython==7.5.0
-ipython-genutils==0.2.0
-iso8601==0.1.12
-isort==4.3.20
-jedi==0.14.0
-jellyfish==0.7.2
-Jinja2==2.10.1
-joblib==0.13.2
-Keras==2.2.4
-Keras-Applications==1.0.8
-Keras-Preprocessing==1.1.0
-lazy-object-proxy==1.4.1
-lxml==4.3.4
-Markdown==3.1.1
-MarkupSafe==1.1.1
-mccabe==0.6.1
-numpy==1.16.4
-packaging==19.0
-pandas==0.24.2
-parso==0.5.0
-pbr==5.3.1
-pexpect==4.7.0
-pickleshare==0.7.5
-prompt-toolkit==2.0.9
-protobuf==3.8.0
-ptyprocess==0.6.0
-pycodestyle==2.5.0
-pyflakes==2.1.1
-Pygments==2.4.2
-pylint==2.3.1
-PyMySQL==0.9.3
-pyparsing==2.4.0
-python-dateutil==2.8.0
-pytz==2019.1
-pywikibot==3.0.20190430
-PyYAML==5.1.1
-recordlinkage==0.13.2
-regex==2019.6.8
-requests==2.22.0
-rope==0.14.0
-scikit-learn==0.21.2
-scipy==1.3.0
-six==1.12.0
-snowballstemmer==1.2.1
-Sphinx==2.1.2
-sphinx-autodoc-typehints==1.6.0
-sphinx-click==2.2.0
-sphinxcontrib-applehelp==1.0.1
-sphinxcontrib-devhelp==1.0.1
-sphinxcontrib-htmlhelp==1.0.2
-sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.2
-sphinxcontrib-serializinghtml==1.1.3
-SQLAlchemy==1.3.5
-tensorboard==1.13.1
-tensorflow==1.13.1
-tensorflow-estimator==1.13.0
-termcolor==1.1.0
-toml==0.10.0
-tqdm==4.32.2
-traitlets==4.3.2
-typed-ast==1.4.0
-urllib3==1.25.3
-wcwidth==0.1.7
-Werkzeug==0.15.4
-wrapt==1.11.2
-mlens==0.2.3
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+#    pip-compile
+#
+absl-py==0.9.0            # via tensorboard, tensorflow
+alabaster==0.7.12         # via sphinx
+appdirs==1.4.3            # via black
+astor==0.8.1              # via tensorflow
+astroid==2.3.3            # via pylint
+attrs==19.3.0             # via black
+autoflake==1.3.1          # via -r requirements.in
+autopep8==1.5             # via -r requirements.in
+babel==2.8.0              # via sphinx
+backcall==0.1.0           # via ipython
+black==19.10b0            # via -r requirements.in
+cachetools==4.0.0         # via google-auth
+certifi==2019.11.28       # via requests
+chardet==3.0.4            # via requests
+click==7.1.1              # via -r requirements.in, black, pip-tools
+decorator==4.4.2          # via ipython, traitlets
+docutils==0.16            # via sphinx
+entrypoints==0.3          # via flake8
+flake8==3.7.9             # via -r requirements.in
+gast==0.2.2               # via tensorflow
+google-auth-oauthlib==0.4.1  # via tensorboard
+google-auth==1.12.0       # via google-auth-oauthlib, tensorboard
+google-pasta==0.2.0       # via tensorflow
+grpcio==1.27.2            # via tensorboard, tensorflow
+h5py==2.10.0              # via keras, keras-applications
+idna==2.9                 # via requests
+imagesize==1.2.0          # via sphinx
+ipdb==0.13.2              # via -r requirements.in
+ipython-genutils==0.2.0   # via traitlets
+ipython==7.13.0           # via -r requirements.in, ipdb
+iso8601==0.1.12           # via -r requirements.in
+isort==4.3.21             # via -r requirements.in, pylint
+jedi==0.16.0              # via ipython
+jellyfish==0.7.2          # via recordlinkage
+jinja2==2.11.1            # via sphinx
+joblib==0.14.1            # via -r requirements.in, recordlinkage, scikit-learn
+keras-applications==1.0.8  # via keras, tensorflow
+keras-preprocessing==1.1.0  # via keras, tensorflow
+keras==2.3.1              # via -r requirements.in
+lazy-object-proxy==1.4.3  # via astroid
+lxml==4.5.0               # via -r requirements.in
+markdown==3.2.1           # via tensorboard
+markupsafe==1.1.1         # via jinja2
+mccabe==0.6.1             # via flake8, pylint
+mlens==0.2.3              # via -r requirements.in
+mypy-extensions==0.4.3    # via mypy
+mypy==0.770               # via -r requirements.in
+numpy==1.18.2             # via h5py, keras, keras-applications, keras-preprocessing, mlens, opt-einsum, pandas, recordlinkage, scikit-learn, scipy, tensorboard, tensorflow
+oauthlib==3.1.0           # via requests-oauthlib
+opt-einsum==3.2.0         # via tensorflow
+packaging==20.3           # via sphinx
+pandas==1.0.3             # via recordlinkage
+parso==0.6.2              # via jedi
+pathspec==0.7.0           # via black
+pbr==5.4.4                # via sphinx-click
+pexpect==4.8.0            # via ipython
+pickleshare==0.7.5        # via ipython
+pip-tools==4.5.1          # via -r requirements.in
+prompt-toolkit==3.0.5     # via ipython
+protobuf==3.11.3          # via tensorboard, tensorflow
+ptyprocess==0.6.0         # via pexpect
+pyasn1-modules==0.2.8     # via google-auth
+pyasn1==0.4.8             # via pyasn1-modules, rsa
+pycodestyle==2.5.0        # via autopep8, flake8
+pyflakes==2.1.1           # via autoflake, flake8
+pygments==2.6.1           # via ipython, sphinx
+pylint==2.4.4             # via -r requirements.in
+pymysql==0.9.3            # via -r requirements.in
+pyparsing==2.4.6          # via packaging
+python-dateutil==2.8.1    # via pandas
+pytz==2019.3              # via babel, pandas
+pywikibot==3.0.20200326   # via -r requirements.in
+pyyaml==5.3.1             # via keras
+recordlinkage==0.14       # via -r requirements.in
+regex==2020.2.20          # via -r requirements.in, black
+requests-oauthlib==1.3.0  # via google-auth-oauthlib
+requests==2.23.0          # via pywikibot, requests-oauthlib, sphinx, tensorboard
+rsa==4.0                  # via google-auth
+scikit-learn==0.22.2.post1  # via recordlinkage
+scipy==1.4.1              # via keras, mlens, recordlinkage, scikit-learn, tensorflow
+six==1.14.0               # via absl-py, astroid, google-auth, google-pasta, grpcio, h5py, keras, keras-preprocessing, packaging, pip-tools, protobuf, python-dateutil, tensorboard, tensorflow, traitlets
+snowballstemmer==2.0.0    # via sphinx
+sphinx-autodoc-typehints==1.10.3  # via -r requirements.in
+sphinx-click==2.3.1       # via -r requirements.in
+sphinx==2.4.4             # via -r requirements.in, sphinx-autodoc-typehints, sphinx-click
+sphinxcontrib-applehelp==1.0.2  # via sphinx
+sphinxcontrib-devhelp==1.0.2  # via sphinx
+sphinxcontrib-htmlhelp==1.0.3  # via sphinx
+sphinxcontrib-jsmath==1.0.1  # via sphinx
+sphinxcontrib-qthelp==1.0.3  # via sphinx
+sphinxcontrib-serializinghtml==1.1.4  # via sphinx
+sqlalchemy==1.3.15        # via -r requirements.in
+tensorboard==2.1.1        # via tensorflow
+tensorflow-estimator==2.1.0  # via tensorflow
+tensorflow==2.1.0         # via -r requirements.in
+termcolor==1.1.0          # via tensorflow
+toml==0.10.0              # via black
+tqdm==4.43.0              # via -r requirements.in
+traitlets==4.3.3          # via ipython
+typed-ast==1.4.1          # via astroid, black, mypy
+typing-extensions==3.7.4.1  # via mypy
+urllib3==1.25.8           # via requests
+wcwidth==0.1.9            # via prompt-toolkit
+werkzeug==1.0.0           # via tensorboard
+wheel==0.34.2             # via tensorboard, tensorflow
+wrapt==1.11.2             # via astroid, tensorflow
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools
diff --git a/scripts/enforce_style.sh b/scripts/enforce_style.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+# This script contains a set of commands to enforce
+# standard style guidelines and design patterns for Python code.
+#
+# Each command writes a report file: it is recommended to inspect
+# one file at a time and fix everything that seems reasonable.
+# File names start with a number that tells you the order of inspection.
+
+USAGE="Usage: $(basename "$0") PYTHON_FILE_OR_DIR"
+if [[ $# -ne 1 ]]; then
+        echo $USAGE
+        exit 1
+fi
+
+WD=$(pwd)
+OUTDIR="$WD/style_reports"
+
+if [[ ! -d $OUTDIR ]]; then
+    mkdir $OUTDIR
+fi
+
+# sphinx warnings
+echo "Step 1: sphinx documentation warnings ..."
+cd "$WD/docs" && make html > "$OUTDIR/01_sphinx_warnings" && cd $WD
+
+# pylint errors
+echo "Step 2: pylint errors ..."
+pylint -j 0 -E $1 > "$OUTDIR/02_pylint_errors"
+
+# pylint warnings
+echo "Step 3: pylint warnings ..."
+pylint -j 0 -d all -e W $1 > "$OUTDIR/03_pylint_warnings"
+
+# mccabe complexity
+echo "Step 4: mccabe complexity ..."
+flake8 --select C90 --max-complexity 10 --output-file "$OUTDIR/04_mccabe_complexity" $1
+
+# pylint refactoring suggestions
+echo "Step 5: pylint refactoring ..."
+pylint -j 0 -d all -e R $1 > "$OUTDIR/05_pylint_refactoring"
+
+# Type hints(AKA annotations) consistency
+echo "Step 6: mypy type hints ..."
+mypy --ignore-missing-imports $1 > "$OUTDIR/06_mypy_type_hints"
+
diff --git a/soweego/cli.py b/soweego/cli.py
@@ -10,9 +10,21 @@
 __copyright__ = 'Copyleft 2018, Hjfocs'
 
 import logging
+import os
+
+# Silence requests log up to INFO
+logging.getLogger('requests').setLevel(logging.WARNING)
+
+# Silence tensorflow, see https://tinyurl.com/qnud7j8
+# Python log up to WARNING
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+# C++ log up to W(arning)
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+# Silence ML-Ensemble, see http://ml-ensemble.com/docs/config.html
+os.environ['MLENS_VERBOSE'] = '0'
 
 import click
-import tensorflow as tf
 
 from soweego import commons
 from soweego import pipeline as pipeline_cli
@@ -21,10 +33,6 @@
 from soweego.linker import cli as linker_cli
 from soweego.validator import cli as validator_cli
 
-# set env variable to ignore tensorflow warnings
-# (only errors are printed)
-tf.logging.set_verbosity(tf.logging.ERROR)
-
 
 CLI_COMMANDS = {
     'importer': importer_cli.cli,
@@ -34,9 +42,6 @@
     'run': pipeline_cli.cli,
 }
 
-# Avoid verbose requests logging
-logging.getLogger('requests').setLevel(logging.WARNING)
-
 
 @click.group(commands=CLI_COMMANDS)
 @click.option(

diff --git a/soweego/commons/utils.py b/soweego/commons/utils.py
@@ -67,8 +67,7 @@ def prepare_stratified_k_fold(k, dataset, positive_samples_index):
 
 def init_model(classifier: str, num_features: int, **kwargs):
     if classifier is keys.NAIVE_BAYES:
-        # add `binarize` threshold if not already specified
-
+        # Add `binarize` threshold if not already specified
         kwargs = {**constants.NAIVE_BAYES_PARAMS, **kwargs}
         model = rl.NaiveBayesClassifier(**kwargs)
 

diff --git a/soweego/linker/classifiers.py b/soweego/linker/classifiers.py
@@ -115,7 +115,7 @@ def _fit(
             validation_split=validation_split,
             batch_size=batch_size,
             epochs=epochs,
-            verbose=0,
+            verbose=1,
             callbacks=[
                 EarlyStopping(
                     monitor='val_loss',

diff --git a/soweego/linker/evaluate.py b/soweego/linker/evaluate.py
@@ -158,7 +158,15 @@ def _run_average(
 ):
     LOGGER.info('Starting average evaluation over %d folds ...', k_folds)
 
-    predictions, p_mean, p_std, r_mean, r_std, fscore_mean, fscore_std = _average_k_fold(
+    (
+        predictions,
+        p_mean,
+        p_std,
+        r_mean,
+        r_std,
+        fscore_mean,
+        fscore_std,
+    ) = _average_k_fold(
         constants.CLASSIFIERS[classifier],
         catalog,
         entity,

diff --git a/soweego/linker/workflow.py b/soweego/linker/workflow.py
@@ -23,7 +23,7 @@
 import recordlinkage as rl
 from numpy import nan
 from pandas import read_sql
-from pandas.io.json.json import JsonReader
+from pandas.io.json._json import JsonReader
 from sqlalchemy.orm import Query
 
 from soweego.commons import (