Skip to content

Commit

Permalink
Merge pull request #58 from ealcobaca/add-references
Browse files Browse the repository at this point in the history
Metafeature references & more pydoc updates
  • Loading branch information
ealcobaca committed Dec 11, 2019
2 parents a1f5d32 + 2a92946 commit 1db5792
Show file tree
Hide file tree
Showing 12 changed files with 1,141 additions and 350 deletions.
2 changes: 1 addition & 1 deletion pymfe/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,7 @@ def check_score(score: str, groups: t.Tuple[str, ...]):
"""Checks if a given score is valid.
Args:
score (:obj: `str`): the score metrics name.
score (:obj:`str`): the score metrics name.
groups (:obj:`Tuple` of :obj:`str`): a tuple of feature group names.
Expand Down
62 changes: 51 additions & 11 deletions pymfe/clustering.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""A module dedicated to the extraction of Clustering Metafeatures.
"""A module dedicated to the extraction of clustering metafeatures.
"""
import typing as t
import itertools
import collections

import numpy as np
import scipy.spatial.distance
# import statsmodels.tools.eval_measures
import sklearn.metrics
import sklearn.neighbors

Expand Down Expand Up @@ -75,7 +74,6 @@ def precompute_clustering_class(cls,
Returns
-------
:obj:`dict`
The following precomputed items are returned:
* ``classes`` (:obj:`np.ndarray`): distinct classes of
``y``, if ``y`` is not :obj:`NoneType`.
Expand Down Expand Up @@ -125,7 +123,6 @@ def precompute_group_distances(cls,
Returns
-------
:obj:`dict`
The following precomputed items are returned:
* ``pairwise_norm_interclass_dist`` (:obj:`np.ndarray`):
normalized distance between each distinct pair of
Expand Down Expand Up @@ -206,9 +203,7 @@ def precompute_nearest_neighbors(cls,
Returns
-------
:obj:`dict`
The following precomputed items are returned:
* ``pairwise_intraclass_dists`` (:obj:`np.ndarray`):
distance between each distinct pair of instances of
the same class.
Expand Down Expand Up @@ -242,7 +237,7 @@ def precompute_class_representatives(
representative: str = "mean",
classes: t.Optional[np.ndarray] = None,
**kwargs) -> t.Dict[str, t.Any]:
"""
"""Precomputations related to cluster representative instances.
Parameters
----------
Expand Down Expand Up @@ -292,9 +287,7 @@ class (effectively holding the same result as if the argument
Returns
-------
:obj:`dict`
The following precomputed items are returned:
* ``pairwise_intraclass_dists`` (:obj:`np.ndarray`):
distance between each distinct pair of instances of
the same class.
Expand Down Expand Up @@ -506,6 +499,11 @@ def ft_vdu(
:obj:`float`
Dunn index for given parameters.
References
----------
.. [1] J.C. Dunn, Well-separated clusters and optimal fuzzy
partitions, J. Cybern. 4 (1) (1974) 95–104.
Notes
-----
.. _scipydoc: :obj:`scipy.spatial.distance` documentation.
Expand All @@ -532,6 +530,11 @@ def ft_vdb(cls, N: np.ndarray, y: np.ndarray) -> float:
Check `dbindex`_ for more information.
References
----------
.. [1] D.L. Davies, D.W. Bouldin, A cluster separation measure,
IEEE Trans. Pattern Anal. Mach. Intell. 1 (2) (1979) 224–227.
Notes
-----
.. _dbindex: :obj:``sklearn.metrics.davies_bouldin_score``
Expand Down Expand Up @@ -572,6 +575,12 @@ def ft_int(
:obj:`float`
INT index.
References
----------
.. [1] Bezdek, J. C.; Pal, N. R. (1998a). Some new indexes of
cluster validity. IEEE Transactions on Systems, Man, and
Cybernetics, Part B, v.28, n.3, p.301–315.
Notes
-----
.. _scipydoc: :obj:`scipy.spatial.distance` documentation.
Expand Down Expand Up @@ -626,6 +635,12 @@ def ft_sil(cls,
:obj:`float`
Mean Silhouette value.
References
----------
.. [1] P.J. Rousseeuw, Silhouettes: a graphical aid to the
interpretation and validation of cluster analysis, J.
Comput. Appl. Math. 20 (1987) 53–65.
Notes
-----
.. _silhouette: :obj:`sklearn.metrics.silhouette_score`
Expand Down Expand Up @@ -654,7 +669,7 @@ def ft_pb(
y: np.ndarray,
dist_metric: str = "euclidean",
) -> float:
"""Pearson Correlation between class matching and instance distances.
"""Pearson correlation between class matching and instance distances.
The measure interval is -1 and +1 (inclusive).
Expand All @@ -668,7 +683,12 @@ def ft_pb(
Returns
-------
:obj:`float`
Point Bisseral coefficient.
Point Biserial coefficient.
References
----------
.. [1] J. Lev, "The Point Biserial Coefficient of Correlation", Ann.
Math. Statist., Vol. 20, no.1, pp. 125-126, 1949.
Notes
-----
Expand All @@ -689,13 +709,19 @@ def ft_pb(
@classmethod
def ft_ch(cls, N: np.ndarray, y: np.ndarray) -> float:
"""Calinski and Harabasz index.
Check `cahascore`_ for more information.
Returns
-------
:obj:`float`
Calinski-Harabasz index.
References
----------
.. [1] T. Calinski, J. Harabasz, A dendrite method for cluster
analysis, Commun. Stat. Theory Methods 3 (1) (1974) 1–27.
Notes
-----
.. _cahascore: ``sklearn.metrics.calinski_harabasz_score``
Expand All @@ -722,6 +748,13 @@ def ft_nre(
-------
:obj:`float`
Entropy of relative class frequencies.
References
----------
.. [1] Bruno Almeida Pimentel, André C.P.L.F. de Carvalho.
A new data characterization for selecting clustering algorithms
using meta-learning. Information Sciences, Volume 477, 2019,
Pages 203-219.
"""
if class_freqs is None:
_, class_freqs = np.unique(y, return_counts=True)
Expand Down Expand Up @@ -755,6 +788,13 @@ def ft_sc(cls,
Number of classes with less than ``size`` instances if
``normalize`` is False, proportion of classes with less
than ``size`` instances otherwise.
References
----------
.. [1] Bruno Almeida Pimentel, André C.P.L.F. de Carvalho.
A new data characterization for selecting clustering algorithms
using meta-learning. Information Sciences, Volume 477, 2019,
Pages 203-219.
"""
if class_freqs is None:
_, class_freqs = np.unique(y, return_counts=True)
Expand Down

0 comments on commit 1db5792

Please sign in to comment.