Skip to content

Commit

Permalink
Merge pull request #114 from jhosoume/dictionary-result
Browse files Browse the repository at this point in the history
Improving output structure
  • Loading branch information
FelSiq committed Mar 17, 2021
2 parents b454f8a + ef7f984 commit 907da86
Showing 1 changed file with 155 additions and 19 deletions.
174 changes: 155 additions & 19 deletions pymfe/mfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import texttable
import numpy as np
import pandas as pd
import sklearn.utils
import sklearn.exceptions
import tqdm.auto
Expand All @@ -18,6 +19,11 @@
]
"""Type annotation for a sequence of TypeExtMtdTuple objects."""

_TypeExtract = t.Union[
t.Tuple[t.List, ...], t.Dict[str, t.List], pd.DataFrame
]
"""Type annotation for the possible output types of the extract."""


class MFE:
"""Core class for metafeature extraction.
Expand Down Expand Up @@ -1175,8 +1181,9 @@ def extract(
verbose: int = 0,
enable_parallel: bool = False,
suppress_warnings: bool = False,
out_type: t.Any = tuple,
**kwargs,
) -> t.Tuple[t.List, ...]:
) -> _TypeExtract:
"""Extracts metafeatures from the previously fitted dataset.
Parameters
Expand Down Expand Up @@ -1212,10 +1219,16 @@ def extract(
For more information see Examples.
out_type: :obj:`Any`, optional
If tuple, then the returned value is a tuple. If dict, then the
returned value is a dictionary. If pd.DataFrame the the returned
value is a pandas.core.DataFrame. Otherwise, an Type Error
is raised.
Returns
-------
:obj:`tuple`(:obj:`list`, :obj:`list`)
A tuple containing two lists (if ``measure_time`` is None.)
A tuple containing two lists (if ``measure_time`` is None).
The first field is the identifiers of each summarized value in the
form ``feature_name.summary_mtd_name`` (i.e., the feature
Expand All @@ -1228,12 +1241,51 @@ def extract(
(i.e., the value at index ``i`` in the second list has its
identifier at the same index in the first list and vice-versa).
:obj:`dict`(:obj:`str`, :obj:`list`)
A dictionary containing two fields (if ``measure_time`` is None).
The fields are: `mtf_names`, `mtf_vals` (if ``measure_time``, the
there is `mtf_time`).
The first field is the identifiers of each summarized value in the
form ``feature_name.summary_mtd_name`` (i.e., the feature
extraction name concatenated by the summary method name, separated
by a dot).
The second field is the summarized values.
Both lists of each field have a 1-1 correspondence by the index of
each elemen (i.e., the value at index ``i`` in the second list has
its identifier at the same index in the first list and vice-versa).
:obj:`pandas.core.frame.DataFrame`
A pandas DataFrame instance.
Each column is a summarized value. The column is identified by the
name of the meta-feature in the form
``feature_name.summary_mtd_name`` (i.e., the featur extraction name
concatenated by the summary method name, separate by a dot).
The rows store the summarized values (if ``measure_time``, there
is a row with the time taken to calculate each value).
Example:
([``attr_ent.mean``, ``attr_ent.sd``], [``0.983``, ``0.344``])
is the return value for the feature ``attr_end`` summarized by
both ``mean`` and ``sd`` (standard deviation), giving the valu-
es ``0.983`` and ``0.344``, respectively.
{
``mtf_names``: [``attr_ent.mean``, ``attr_ent.sd``],
``mtf_values``: [``0.983``, ``0.344``]
}
is the return value when ``out_type`` is set to `dict`.
[pandas.core.DataFrame]
``attr_ent.mean`` ``attr_ent.sd``
0 ``0.983`` ``0.344``
is the return value when ``out_type`` is set to `pd.DataFrame`.
if ``measure_time`` is given during the model instantiation, a
third list will be returned with the time spent during the
calculations for the corresponding (by index) metafeature.
Expand All @@ -1243,6 +1295,9 @@ def extract(
TypeError
If calling ``extract`` method before ``fit`` method.
TypeError
If calling ``extract`` method with invalid ``out_type``.
Examples
--------
Using kwargs. Option 1 to pass ft. extraction custom arguments:
Expand Down Expand Up @@ -1336,10 +1391,35 @@ def extract(
sep="\n",
)

if self.timeopt:
return res_names, res_vals, res_times
_deal_types = {
tuple: lambda names, vals, times = []:
(names, vals, times) if self.timeopt else (names, vals),
dict: lambda names, vals, times = []:
{
"mtf_names": names,
"mtf_vals": vals,
"mtf_time": times
} if self.timeopt else
{
"mtf_names": names,
"mtf_vals": vals
},
pd.DataFrame: lambda names, vals, times = []:
pd.DataFrame(
data=(vals, times),
columns=names,
index=("values", "time")
) if self.timeopt else
pd.DataFrame(
data=(vals,),
columns=names
)
}

return res_names, res_vals
try:
return _deal_types[out_type](res_names, res_vals, res_times)
except KeyError as out_not_defined:
raise TypeError("Output type not supported.") from out_not_defined

def extract_metafeature_names(
self, supervised: bool = True
Expand All @@ -1360,7 +1440,8 @@ def extract_metafeature_names(
Returns
-------
tuple
Tuple with meta-feature names to be extracted as values.
If Tuple with meta-feature names to be extracted as values.
"""
if self.X is not None:
custom_args_ft = self._custom_args_ft
Expand Down Expand Up @@ -1480,17 +1561,41 @@ def _extract_with_bootstrap(

def _handle_extract_ret(
res: t.Tuple[np.ndarray, ...],
args: t.Tuple[t.List, ...],
args: t.Union[t.Tuple[t.List, ...], t.Dict[str, t.Any]],
it_num: int,
) -> t.Tuple[np.ndarray, ...]:
"""Handle each .extraction method return value."""
mtf_names, mtf_vals, mtf_time = res

if not self.timeopt:
cur_mtf_names, cur_mtf_vals = args
_handle_output = {
tuple: lambda args: args,
dict: lambda args:
(
args["mtf_names"],
args["mtf_vals"],
args["mtf_time"]
) if self.timeopt else
(
args["mtf_names"],
args["mtf_vals"],
),
pd.DataFrame: lambda args:
(
list(args.columns),
args.values[0],
args.values[1]
) if self.timeopt else
(
list(args.columns),
args.values[0],
)
}

if self.timeopt:
cur_mtf_names, cur_mtf_vals, cur_mtf_time = \
_handle_output[type(args)](args)
else:
cur_mtf_names, cur_mtf_vals, cur_mtf_time = args
cur_mtf_names, cur_mtf_vals = _handle_output[type(args)](args)

if mtf_names.size:
mtf_vals[:, it_num] = cur_mtf_vals
Expand Down Expand Up @@ -1571,7 +1676,7 @@ def extract_with_confidence(
arguments_fit: t.Optional[t.Dict[str, t.Any]] = None,
arguments_extract: t.Optional[t.Dict[str, t.Any]] = None,
verbose: int = 0,
) -> t.Tuple[np.ndarray, ...]:
) -> _TypeExtract:
"""Extract metafeatures with confidence intervals.
To build the confidence intervals, each metafeature is extracted
Expand Down Expand Up @@ -1711,7 +1816,7 @@ def extract_with_confidence(
verbose=verbose,
arguments_fit=arguments_fit,
arguments_extract=arguments_extract,
)
) # Returns a t.Tuple[t.List,...]

if verbose > 0:
print("Finished metafeature extract with _confidence interval.")
Expand All @@ -1727,21 +1832,50 @@ def extract_with_confidence(
if return_avg_val:
mtf_vals = np.nanmean(mtf_vals, axis=1)

if self.timeopt:
if return_avg_val:
mtf_time /= sample_num
if self.timeopt and return_avg_val:
mtf_time /= sample_num

_deal_types = {
tuple: lambda names, vals, conf, times = []:
(names, vals, times, conf) if self.timeopt
else (names, vals, conf),
dict: lambda names, vals, conf, times = []:
{
"mtf_names": names,
"mtf_vals": vals,
"confidence": conf,
"mtf_time": times
} if self.timeopt else
{
"mtf_names": names,
"mtf_vals": vals,
"confidence": conf
},
}

return mtf_names, mtf_vals, mtf_time, mtf_conf_int
# Check if the type was defined previously
if "out_type" in arguments_extract:
out_type = arguments_extract["out_type"]
else:
out_type = tuple

return mtf_names, mtf_vals, mtf_conf_int
try:
return _deal_types[out_type](
mtf_names,
mtf_vals,
mtf_conf_int,
mtf_time
)
except KeyError as out_not_defined:
raise TypeError("Unknown output type.") from out_not_defined

def extract_from_model(
self,
model: t.Any,
arguments_fit: t.Optional[t.Dict[str, t.Any]] = None,
arguments_extract: t.Optional[t.Dict[str, t.Any]] = None,
verbose: int = 0,
) -> t.Tuple[t.List, ...]:
) -> _TypeExtract:
"""Extract model-based metafeatures from given model.
The random seed used by the new internal model is the same random
Expand Down Expand Up @@ -1775,7 +1909,9 @@ def extract_from_model(
Returns
-------
:obj:`tuple`(:obj:`list`, :obj:`list`)
:obj:`tuple`(:obj:`list`, :obj:`list`) or
:obj:`dict`(:obj:`str`, :obj:`any`) or
:obj:`pandas.core.DataFrame`
See `.extract` method return value for more information.
Notes
Expand Down

0 comments on commit 907da86

Please sign in to comment.