Skip to content

Commit

Permalink
Merge pull request #384 from Wikidata/depencencies-update
Browse files Browse the repository at this point in the history
Dependencies update. See #384 (comment) for important details
  • Loading branch information
marfox committed Apr 1, 2020
2 parents 1aabe54 + 23afc3e commit 020a792
Show file tree
Hide file tree
Showing 11 changed files with 239 additions and 118 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ dist: xenial
language: python
python: 3.7
script:
- isort -y -rc soweego
- isort -y -rc -s soweego/cli.py soweego
- autoflake -i -r --remove-all-unused-imports --remove-unused-variables soweego
- black -S -l 80 soweego
- |
Expand Down
27 changes: 27 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
click
iso8601
joblib
keras
lxml
mlens
pymysql
pywikibot
recordlinkage
regex
sqlalchemy
tensorflow
tqdm
# Dev
autoflake
autopep8
black
flake8
ipdb
ipython
isort
mypy
pip-tools
pylint
sphinx
sphinx-autodoc-typehints
sphinx-click
208 changes: 115 additions & 93 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,93 +1,115 @@
absl-py==0.7.1
alabaster==0.7.12
appdirs==1.4.3
appnope==0.1.0
astor==0.8.0
astroid==2.2.5
attrs==19.1.0
autoflake==1.3
autopep8==1.4.4
Babel==2.7.0
backcall==0.1.0
black==19.3b0
certifi==2019.6.16
chardet==3.0.4
Click==7.0
decorator==4.4.0
docutils==0.14
entrypoints==0.3
flake8==3.7.7
gast==0.2.2
google-pasta==0.1.7
grpcio==1.22.0rc1
h5py==2.9.0
idna==2.8
imagesize==1.1.0
ipdb==0.12
ipython==7.5.0
ipython-genutils==0.2.0
iso8601==0.1.12
isort==4.3.20
jedi==0.14.0
jellyfish==0.7.2
Jinja2==2.10.1
joblib==0.13.2
Keras==2.2.4
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
lazy-object-proxy==1.4.1
lxml==4.3.4
Markdown==3.1.1
MarkupSafe==1.1.1
mccabe==0.6.1
numpy==1.16.4
packaging==19.0
pandas==0.24.2
parso==0.5.0
pbr==5.3.1
pexpect==4.7.0
pickleshare==0.7.5
prompt-toolkit==2.0.9
protobuf==3.8.0
ptyprocess==0.6.0
pycodestyle==2.5.0
pyflakes==2.1.1
Pygments==2.4.2
pylint==2.3.1
PyMySQL==0.9.3
pyparsing==2.4.0
python-dateutil==2.8.0
pytz==2019.1
pywikibot==3.0.20190430
PyYAML==5.1.1
recordlinkage==0.13.2
regex==2019.6.8
requests==2.22.0
rope==0.14.0
scikit-learn==0.21.2
scipy==1.3.0
six==1.12.0
snowballstemmer==1.2.1
Sphinx==2.1.2
sphinx-autodoc-typehints==1.6.0
sphinx-click==2.2.0
sphinxcontrib-applehelp==1.0.1
sphinxcontrib-devhelp==1.0.1
sphinxcontrib-htmlhelp==1.0.2
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.2
sphinxcontrib-serializinghtml==1.1.3
SQLAlchemy==1.3.5
tensorboard==1.13.1
tensorflow==1.13.1
tensorflow-estimator==1.13.0
termcolor==1.1.0
toml==0.10.0
tqdm==4.32.2
traitlets==4.3.2
typed-ast==1.4.0
urllib3==1.25.3
wcwidth==0.1.7
Werkzeug==0.15.4
wrapt==1.11.2
mlens==0.2.3
#
# This file is autogenerated by pip-compile
# To update, run:
#
# pip-compile
#
absl-py==0.9.0 # via tensorboard, tensorflow
alabaster==0.7.12 # via sphinx
appdirs==1.4.3 # via black
astor==0.8.1 # via tensorflow
astroid==2.3.3 # via pylint
attrs==19.3.0 # via black
autoflake==1.3.1 # via -r requirements.in
autopep8==1.5 # via -r requirements.in
babel==2.8.0 # via sphinx
backcall==0.1.0 # via ipython
black==19.10b0 # via -r requirements.in
cachetools==4.0.0 # via google-auth
certifi==2019.11.28 # via requests
chardet==3.0.4 # via requests
click==7.1.1 # via -r requirements.in, black, pip-tools
decorator==4.4.2 # via ipython, traitlets
docutils==0.16 # via sphinx
entrypoints==0.3 # via flake8
flake8==3.7.9 # via -r requirements.in
gast==0.2.2 # via tensorflow
google-auth-oauthlib==0.4.1 # via tensorboard
google-auth==1.12.0 # via google-auth-oauthlib, tensorboard
google-pasta==0.2.0 # via tensorflow
grpcio==1.27.2 # via tensorboard, tensorflow
h5py==2.10.0 # via keras, keras-applications
idna==2.9 # via requests
imagesize==1.2.0 # via sphinx
ipdb==0.13.2 # via -r requirements.in
ipython-genutils==0.2.0 # via traitlets
ipython==7.13.0 # via -r requirements.in, ipdb
iso8601==0.1.12 # via -r requirements.in
isort==4.3.21 # via -r requirements.in, pylint
jedi==0.16.0 # via ipython
jellyfish==0.7.2 # via recordlinkage
jinja2==2.11.1 # via sphinx
joblib==0.14.1 # via -r requirements.in, recordlinkage, scikit-learn
keras-applications==1.0.8 # via keras, tensorflow
keras-preprocessing==1.1.0 # via keras, tensorflow
keras==2.3.1 # via -r requirements.in
lazy-object-proxy==1.4.3 # via astroid
lxml==4.5.0 # via -r requirements.in
markdown==3.2.1 # via tensorboard
markupsafe==1.1.1 # via jinja2
mccabe==0.6.1 # via flake8, pylint
mlens==0.2.3 # via -r requirements.in
mypy-extensions==0.4.3 # via mypy
mypy==0.770 # via -r requirements.in
numpy==1.18.2 # via h5py, keras, keras-applications, keras-preprocessing, mlens, opt-einsum, pandas, recordlinkage, scikit-learn, scipy, tensorboard, tensorflow
oauthlib==3.1.0 # via requests-oauthlib
opt-einsum==3.2.0 # via tensorflow
packaging==20.3 # via sphinx
pandas==1.0.3 # via recordlinkage
parso==0.6.2 # via jedi
pathspec==0.7.0 # via black
pbr==5.4.4 # via sphinx-click
pexpect==4.8.0 # via ipython
pickleshare==0.7.5 # via ipython
pip-tools==4.5.1 # via -r requirements.in
prompt-toolkit==3.0.5 # via ipython
protobuf==3.11.3 # via tensorboard, tensorflow
ptyprocess==0.6.0 # via pexpect
pyasn1-modules==0.2.8 # via google-auth
pyasn1==0.4.8 # via pyasn1-modules, rsa
pycodestyle==2.5.0 # via autopep8, flake8
pyflakes==2.1.1 # via autoflake, flake8
pygments==2.6.1 # via ipython, sphinx
pylint==2.4.4 # via -r requirements.in
pymysql==0.9.3 # via -r requirements.in
pyparsing==2.4.6 # via packaging
python-dateutil==2.8.1 # via pandas
pytz==2019.3 # via babel, pandas
pywikibot==3.0.20200326 # via -r requirements.in
pyyaml==5.3.1 # via keras
recordlinkage==0.14 # via -r requirements.in
regex==2020.2.20 # via -r requirements.in, black
requests-oauthlib==1.3.0 # via google-auth-oauthlib
requests==2.23.0 # via pywikibot, requests-oauthlib, sphinx, tensorboard
rsa==4.0 # via google-auth
scikit-learn==0.22.2.post1 # via recordlinkage
scipy==1.4.1 # via keras, mlens, recordlinkage, scikit-learn, tensorflow
six==1.14.0 # via absl-py, astroid, google-auth, google-pasta, grpcio, h5py, keras, keras-preprocessing, packaging, pip-tools, protobuf, python-dateutil, tensorboard, tensorflow, traitlets
snowballstemmer==2.0.0 # via sphinx
sphinx-autodoc-typehints==1.10.3 # via -r requirements.in
sphinx-click==2.3.1 # via -r requirements.in
sphinx==2.4.4 # via -r requirements.in, sphinx-autodoc-typehints, sphinx-click
sphinxcontrib-applehelp==1.0.2 # via sphinx
sphinxcontrib-devhelp==1.0.2 # via sphinx
sphinxcontrib-htmlhelp==1.0.3 # via sphinx
sphinxcontrib-jsmath==1.0.1 # via sphinx
sphinxcontrib-qthelp==1.0.3 # via sphinx
sphinxcontrib-serializinghtml==1.1.4 # via sphinx
sqlalchemy==1.3.15 # via -r requirements.in
tensorboard==2.1.1 # via tensorflow
tensorflow-estimator==2.1.0 # via tensorflow
tensorflow==2.1.0 # via -r requirements.in
termcolor==1.1.0 # via tensorflow
toml==0.10.0 # via black
tqdm==4.43.0 # via -r requirements.in
traitlets==4.3.3 # via ipython
typed-ast==1.4.1 # via astroid, black, mypy
typing-extensions==3.7.4.1 # via mypy
urllib3==1.25.8 # via requests
wcwidth==0.1.9 # via prompt-toolkit
werkzeug==1.0.0 # via tensorboard
wheel==0.34.2 # via tensorboard, tensorflow
wrapt==1.11.2 # via astroid, tensorflow

# The following packages are considered to be unsafe in a requirements file:
# setuptools
46 changes: 46 additions & 0 deletions scripts/enforce_style.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env bash

# This script contains a set of commands to enforce
# standard style guidelines and design patterns for Python code.
#
# Each command writes a report file: it is recommended to inspect
# one file at a time and fix everything that seems reasonable.
# File names start with a number that tells you the order of inspection.

USAGE="Usage: $(basename "$0") PYTHON_FILE_OR_DIR"
if [[ $# -ne 1 ]]; then
echo $USAGE
exit 1
fi

WD=$(pwd)
OUTDIR="$WD/style_reports"

if [[ ! -d $OUTDIR ]]; then
mkdir $OUTDIR
fi

# sphinx warnings
echo "Step 1: sphinx documentation warnings ..."
cd "$WD/docs" && make html > "$OUTDIR/01_sphinx_warnings" && cd $WD

# pylint errors
echo "Step 2: pylint errors ..."
pylint -j 0 -E $1 > "$OUTDIR/02_pylint_errors"

# pylint warnings
echo "Step 3: pylint warnings ..."
pylint -j 0 -d all -e W $1 > "$OUTDIR/03_pylint_warnings"

# mccabe complexity
echo "Step 4: mccabe complexity ..."
flake8 --select C90 --max-complexity 10 --output-file "$OUTDIR/04_mccabe_complexity" $1

# pylint refactoring suggestions
echo "Step 5: pylint refactoring ..."
pylint -j 0 -d all -e R $1 > "$OUTDIR/05_pylint_refactoring"

# Type hints(AKA annotations) consistency
echo "Step 6: mypy type hints ..."
mypy --ignore-missing-imports $1 > "$OUTDIR/06_mypy_type_hints"

21 changes: 13 additions & 8 deletions soweego/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,21 @@
__copyright__ = 'Copyleft 2018, Hjfocs'

import logging
import os

# Silence requests log up to INFO
logging.getLogger('requests').setLevel(logging.WARNING)

# Silence tensorflow, see https://tinyurl.com/qnud7j8
# Python log up to WARNING
logging.getLogger('tensorflow').setLevel(logging.ERROR)
# C++ log up to W(arning)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Silence ML-Ensemble, see http://ml-ensemble.com/docs/config.html
os.environ['MLENS_VERBOSE'] = '0'

import click
import tensorflow as tf

from soweego import commons
from soweego import pipeline as pipeline_cli
Expand All @@ -21,10 +33,6 @@
from soweego.linker import cli as linker_cli
from soweego.validator import cli as validator_cli

# set env variable to ignore tensorflow warnings
# (only errors are printed)
tf.logging.set_verbosity(tf.logging.ERROR)


CLI_COMMANDS = {
'importer': importer_cli.cli,
Expand All @@ -34,9 +42,6 @@
'run': pipeline_cli.cli,
}

# Avoid verbose requests logging
logging.getLogger('requests').setLevel(logging.WARNING)


@click.group(commands=CLI_COMMANDS)
@click.option(
Expand Down
3 changes: 1 addition & 2 deletions soweego/commons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def prepare_stratified_k_fold(k, dataset, positive_samples_index):

def init_model(classifier: str, num_features: int, **kwargs):
if classifier is keys.NAIVE_BAYES:
# add `binarize` threshold if not already specified

# Add `binarize` threshold if not already specified
kwargs = {**constants.NAIVE_BAYES_PARAMS, **kwargs}
model = rl.NaiveBayesClassifier(**kwargs)

Expand Down
2 changes: 1 addition & 1 deletion soweego/linker/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _fit(
validation_split=validation_split,
batch_size=batch_size,
epochs=epochs,
verbose=0,
verbose=1,
callbacks=[
EarlyStopping(
monitor='val_loss',
Expand Down
10 changes: 9 additions & 1 deletion soweego/linker/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,15 @@ def _run_average(
):
LOGGER.info('Starting average evaluation over %d folds ...', k_folds)

predictions, p_mean, p_std, r_mean, r_std, fscore_mean, fscore_std = _average_k_fold(
(
predictions,
p_mean,
p_std,
r_mean,
r_std,
fscore_mean,
fscore_std,
) = _average_k_fold(
constants.CLASSIFIERS[classifier],
catalog,
entity,
Expand Down
2 changes: 1 addition & 1 deletion soweego/linker/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import recordlinkage as rl
from numpy import nan
from pandas import read_sql
from pandas.io.json.json import JsonReader
from pandas.io.json._json import JsonReader
from sqlalchemy.orm import Query

from soweego.commons import (
Expand Down

0 comments on commit 020a792

Please sign in to comment.