Skip to content

Commit

Permalink
flake8
Browse files Browse the repository at this point in the history
  • Loading branch information
edublancas committed Jun 26, 2021
1 parent dcb90d2 commit ff394cb
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 45 deletions.
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,11 @@
# A dictionary with options for the search language support, empty by default.
# 'ja' uses this config value.
# 'zh' user can custom change `jieba` dictionary path.
#html_search_options = {'type': 'default'}
# html_search_options = {'type': 'default'}

# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#html_search_scorer = 'scorer.js'
# html_search_scorer = 'scorer.js'

# Output file base name for HTML help builder.
htmlhelp_basename = 'sklearn-evaluationdoc'
Expand Down
5 changes: 4 additions & 1 deletion docs/source/nbs/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@
list(d['feature_names'])
# -

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=0.33,
random_state=42)

model = class_(**params)

Expand Down
2 changes: 1 addition & 1 deletion examples/roc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
y_true = y_test

plot.roc(y_true, y_score)
plt.show()
plt.show()
12 changes: 9 additions & 3 deletions examples/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,24 @@
import matplotlib.pyplot as plt

from matplotlib import style

style.use('seaborn-dark')

# Import some data to play with
data = datasets.make_classification(1000, 10, n_informative=5, class_sep=0.7, n_classes=8)
data = datasets.make_classification(1000,
10,
n_informative=5,
class_sep=0.7,
n_classes=8)
X = data[0]
y = data[1]

# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=.5,
random_state=0)


est = RandomForestClassifier()
est.fit(X_train, y_train)
y_pred = est.predict(X_test)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
description-file = README.md

[flake8]
exclude = .nox, .git, build
exclude = .nox, .git, build, .ipynb_checkpoints, docs/source/conf.py
2 changes: 1 addition & 1 deletion src/sklearn_evaluation/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def feature_importances(data, top_n=None, feature_names=None):
sub_imp = np.array([e.feature_importances_ for e in data.estimators_])
# calculate std
std = np.std(sub_imp, axis=0)
except AttributeError:
except Exception:
std = None

# get the number of features
Expand Down
2 changes: 1 addition & 1 deletion src/sklearn_evaluation/nb/NotebookCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class NotebookCollection(Mapping):
"""Compare output from a collection of notebooks
To access output, notebooks must tag the cells (one tag per cell). For
instructions on tagging cells, `see this <https://jupyterbook.org/advanced/advanced.html#how-should-i-add-cell-tags-and-metadata-to-my-notebooks>`_.
instructions on tagging cells, `see this <https://jupyterbook.org/advanced/advanced.html#how-should-i-add-cell-tags-and-metadata-to-my-notebooks>`_. # noqa
:doc:`Click here <../user_guide/NotebookCollection>` to see the user guide.
Expand Down
2 changes: 1 addition & 1 deletion src/sklearn_evaluation/nb/NotebookIntrospector.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class NotebookIntrospector(Mapping):
"""Retrieve output from a notebook file with tagged cells.
For instructions on tagging cells,
`see this <https://jupyterbook.org/advanced/advanced.html#how-should-i-add-cell-tags-and-metadata-to-my-notebooks>`_.
`see this <https://jupyterbook.org/advanced/advanced.html#how-should-i-add-cell-tags-and-metadata-to-my-notebooks>`_. # noqa
Notes
-----
Expand Down
34 changes: 23 additions & 11 deletions src/sklearn_evaluation/plot/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,12 @@
from ..util import is_column_vector, is_row_vector, default_heatmap


def confusion_matrix(y_true, y_pred, target_names=None, normalize=False,
cmap=None, ax=None):
def confusion_matrix(y_true,
y_pred,
target_names=None,
normalize=False,
cmap=None,
ax=None):
"""
Plot confustion matrix.
Expand Down Expand Up @@ -57,10 +61,10 @@ def confusion_matrix(y_true, y_pred, target_names=None, normalize=False,
expected_len = len(values)

if target_names and (expected_len != len(target_names)):
raise ValueError(('Data cointains {} different values, but target'
' names contains {} values.'.format(expected_len,
len(target_names)
)))
raise ValueError(
('Data cointains {} different values, but target'
' names contains {} values.'.format(expected_len,
len(target_names))))

# if the user didn't pass target_names, create generic ones
if not target_names:
Expand All @@ -86,9 +90,12 @@ def confusion_matrix(y_true, y_pred, target_names=None, normalize=False,
for (y, x), v in np.ndenumerate(cm):
try:
label = '{:.2}'.format(v)
except:
except Exception:
label = v
ax.text(x, y, label, horizontalalignment='center',
ax.text(x,
y,
label,
horizontalalignment='center',
verticalalignment='center')

if cmap is None:
Expand Down Expand Up @@ -117,8 +124,11 @@ def confusion_matrix(y_true, y_pred, target_names=None, normalize=False,


# http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html
def feature_importances(data, top_n=None, feature_names=None,
orientation='horizontal', ax=None):
def feature_importances(data,
top_n=None,
feature_names=None,
orientation='horizontal',
ax=None):
"""
Get and order feature importances from a scikit-learn model
or from an array-like structure. If data is a scikit-learn model with
Expand Down Expand Up @@ -156,7 +166,9 @@ def feature_importances(data, top_n=None, feature_names=None,
# If no feature_names is provided, assign numbers
res = compute.feature_importances(data, top_n, feature_names)

ax = bar.plot(res.importance, orientation, res.feature_name,
ax = bar.plot(res.importance,
orientation,
res.feature_name,
error=None if not hasattr(res, 'std_') else res.std_)
ax.set_title("Feature importances")
return ax
Expand Down
2 changes: 1 addition & 1 deletion src/sklearn_evaluation/training/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
"""
from sklearn_evaluation.training.selector import DataSelector

__all__ = ['DataSelector']
__all__ = ['DataSelector']
4 changes: 2 additions & 2 deletions src/sklearn_evaluation/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def _mapping_to_tuple_pairs(d):
return tuple(product(*t))


def _flatten_list(l):
return [item for sublist in l for item in sublist]
def _flatten_list(elements):
return [item for sublist in elements for item in sublist]


def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
Expand Down
68 changes: 48 additions & 20 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from unittest import TestCase
from sklearn_evaluation.metrics import (precision_at, labels_at,
tp_at, fp_at)
from sklearn_evaluation.metrics import (precision_at, labels_at, tp_at, fp_at)

import numpy as np
from numpy import nan


class Test_precision_at(TestCase):

def test_perfect_precision(self):
labels = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
scores = np.array([100, 90, 80, 70, 60, 50, 40, 30, 20, 10])
Expand All @@ -18,8 +16,10 @@ def test_perfect_precision(self):
def test_perfect_precision_with_nas(self):
labels = np.array([1, nan, 1, 1, 1, nan, 0, 0, 0, 0])
scores = np.array([100, 90, 80, 70, 60, 50, 40, 30, 20, 10])
prec, cutoff = precision_at(
labels, scores, top_proportion=0.10, ignore_nas=True)
prec, cutoff = precision_at(labels,
scores,
top_proportion=0.10,
ignore_nas=True)
self.assertEqual(prec, 1.0)
self.assertEqual(cutoff, 100)

Expand All @@ -33,42 +33,57 @@ def test_baseline_precision(self):
def test_baseline_precision_with_nas(self):
labels = np.array([nan, 1, nan, 1, 1, nan, nan, 0, 0, 0])
scores = np.array([100, 90, 80, 70, 60, 50, 40, 30, 20, 10])
prec, cutoff = precision_at(
labels, scores, top_proportion=1.0, ignore_nas=True)
prec, cutoff = precision_at(labels,
scores,
top_proportion=1.0,
ignore_nas=True)
self.assertEqual(prec, 0.5)
self.assertEqual(cutoff, 10)

def test_proportion_less_than_zero(self):
self.assertRaises(ValueError, precision_at, [1], [0], -0.1)

def test_proportion_more_than_one(self):
self.assertRaises(ValueError, precision_at, [1], [0], top_proportion=1.1)
self.assertRaises(ValueError,
precision_at, [1], [0],
top_proportion=1.1)


class Test_labels_at(TestCase):

def test_no_labels_at_1(self):
y_true = np.array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.random.rand(1, 10)
labels = labels_at(y_true, y_score, top_proportion=0.01, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.01,
normalize=False)
self.assertEqual(labels, 0)

def test_no_labels_at_50(self):
y_true = np.array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.random.rand(1, 10)
labels = labels_at(y_true, y_score, top_proportion=0.5, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.5,
normalize=False)
self.assertEqual(labels, 0)

def test_no_labels_at_100(self):
y_true = np.array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.random.rand(1, 10)
labels = labels_at(y_true, y_score, top_proportion=1.0, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=1.0,
normalize=False)
self.assertEqual(labels, 0)

def test_one_label_at_10(self):
y_true = np.array([1, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=0.1, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.1,
normalize=False)
self.assertEqual(labels, 1)

def test_one_label_at_10_norm(self):
Expand All @@ -80,19 +95,28 @@ def test_one_label_at_10_norm(self):
def test_one_label_at_50(self):
y_true = np.array([1, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=0.5, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.5,
normalize=False)
self.assertEqual(labels, 1)

def test_one_label_at_100(self):
y_true = np.array([1, nan, nan, nan, nan, nan, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=1.0, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=1.0,
normalize=False)
self.assertEqual(labels, 1)

def test_60_labels_at_60(self):
y_true = np.array([1, 1, 1, 1, 1, 1, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=0.6, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.6,
normalize=False)
self.assertEqual(labels, 6)

def test_60_labels_at_60_norm(self):
Expand All @@ -104,7 +128,10 @@ def test_60_labels_at_60_norm(self):
def test_60_labels_at_60_mixed_values(self):
y_true = np.array([1, 0, 0, 1, 0, 1, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=0.6, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.6,
normalize=False)
self.assertEqual(labels, 6)

def test_60_labels_at_60_norm_mixed_values(self):
Expand All @@ -116,7 +143,10 @@ def test_60_labels_at_60_norm_mixed_values(self):
def test_60_labels_at_30(self):
y_true = np.array([1, 1, 1, 1, 1, 1, nan, nan, nan, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
labels = labels_at(y_true, y_score, top_proportion=0.3, normalize=False)
labels = labels_at(y_true,
y_score,
top_proportion=0.3,
normalize=False)
self.assertEqual(labels, 3)

def test_60_labels_at_30_norm(self):
Expand All @@ -133,7 +163,6 @@ def test_proportion_more_than_one(self):


class Test_tp_at(TestCase):

def test_with_nas(self):
y_true = np.array([1, nan, 1, 1, 1, 1, 1, 1, 1, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
Expand Down Expand Up @@ -196,7 +225,6 @@ def test_proportion_more_than_one(self):


class Test_fp_at(TestCase):

def test_with_nas(self):
y_true = np.array([0, nan, 1, 1, 1, 1, 1, 1, 1, nan])
y_score = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
Expand Down

0 comments on commit ff394cb

Please sign in to comment.