This repository has been archived by the owner on Jan 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Data Exporter Before Training the Estimator (#180)
* Setting a step to export the processed data before feeding it to the estimator
- Loading branch information
Showing
9 changed files
with
147 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
"""SmartSummarizer for FeatureExporterMapper step.""" | ||
from foreshadow.concrete.internals import NoTransform | ||
from foreshadow.smart.smart import SmartTransformer | ||
|
||
|
||
class DataExporter(SmartTransformer): | ||
"""Empty Smart transformer for feature exporter step.""" | ||
|
||
def __init__(self, check_wrapped=True, **kwargs): | ||
super().__init__(check_wrapped=check_wrapped, **kwargs) | ||
|
||
def pick_transformer(self, X, y=None, **fit_params): | ||
"""Get best transformer for a given set of columns. | ||
Args: | ||
X: input DataFrame | ||
y: input labels | ||
**fit_params: fit_params | ||
Returns: | ||
No transformer. | ||
""" | ||
return NoTransform() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
"""PrepareStep that exports the processed data before sending to Estimator.""" | ||
from foreshadow.logging import logging | ||
from foreshadow.smart import DataExporter | ||
from foreshadow.utils import ConfigKey, DefaultConfig | ||
|
||
from .autointentmap import AutoIntentMixin | ||
from .preparerstep import PreparerStep | ||
|
||
|
||
class DataExporterMapper(PreparerStep, AutoIntentMixin): | ||
"""Define the single step for FeatureExporter. | ||
Args: | ||
**kwargs: kwargs to PreparerStep initializer. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
"""Define the single step for FeatureExporter. | ||
Args: | ||
**kwargs: kwargs to PreparerStep initializer. | ||
""" | ||
super().__init__(**kwargs) | ||
|
||
def get_mapping(self, X): # noqa | ||
return self.separate_cols( | ||
transformers=[ | ||
[DataExporter(cache_manager=self.cache_manager)] for c in X | ||
], | ||
cols=X.columns, | ||
) | ||
|
||
def fit_transform(self, X, y=None, **fit_params): | ||
"""Fit then transform this PreparerStep. | ||
Side-affect: export the dataframe to disk as a csv file. | ||
Args: | ||
X: input DataFrame | ||
y: input labels | ||
**fit_params: kwarg params to fit | ||
Returns: | ||
Result from .transform(), pass through. | ||
""" | ||
Xt = super().fit_transform(X, y, **fit_params) | ||
if ( | ||
ConfigKey.PROCESSED_DATA_EXPORT_PATH | ||
not in self.cache_manager["config"] | ||
): | ||
data_path = DefaultConfig.PROCESSED_DATA_EXPORT_PATH | ||
else: | ||
data_path = self.cache_manager["config"][ | ||
ConfigKey.PROCESSED_DATA_EXPORT_PATH | ||
] | ||
Xt.to_csv(data_path, index=False) | ||
logging.info("Exported processed data to {}".format(data_path)) | ||
return Xt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
"""Test Data Exporter""" | ||
|
||
from foreshadow.cachemanager import CacheManager | ||
from foreshadow.steps import DataExporterMapper | ||
from foreshadow.utils import ConfigKey | ||
|
||
|
||
def test_data_exporter_fit_transform(): | ||
export_path = "data_export.csv" | ||
cache_manager = CacheManager() | ||
cache_manager["config"][ConfigKey.PROCESSED_DATA_EXPORT_PATH] = export_path | ||
|
||
exporter = DataExporterMapper(cache_manager=cache_manager) | ||
|
||
from sklearn.datasets import load_breast_cancer | ||
import pandas as pd | ||
|
||
cancer = load_breast_cancer() | ||
cancerX_df = pd.DataFrame(cancer.data, columns=cancer.feature_names) | ||
|
||
processed_df = exporter.fit_transform(X=cancerX_df) | ||
|
||
pd.testing.assert_frame_equal(processed_df, cancerX_df) | ||
|
||
with open(export_path, "r") as fopen: | ||
exported_df = pd.read_csv(fopen) | ||
pd.testing.assert_frame_equal(processed_df, exported_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters