@@ -65,6 +65,8 @@ class FeatureSelectionPlacementOptions(str, Enum):
6565 NONE = "none"
6666 NUMERIC = "numeric"
6767 CATEGORICAL = "categorical"
68+ TEXT = "text"
69+ DATE = "date"
6870 ALL_FEATURES = "all_features"
6971
7072
@@ -645,11 +647,6 @@ def _add_pipeline_numeric(
645647 # Check if distribution-aware encoding is enabled
646648 if self .use_distribution_aware :
647649 logger .info (f"Using distribution-aware encoding for { feature_name } " )
648- # Cast to float32 before distribution-aware encoding
649- preprocessor .add_processing_step (
650- layer_creator = PreprocessorLayerFactory .cast_to_float32_layer ,
651- name = f"pre_dist_cast_to_float_{ feature_name } " ,
652- )
653650 # Check if manually specified distribution is provided
654651 _prefered_distribution = _feature .kwargs .get ("prefered_distribution" )
655652 if _prefered_distribution is not None :
@@ -920,6 +917,22 @@ def _add_pipeline_text(self, feature_name: str, input_layer, stats: dict) -> Non
920917 )
921918 # Process the feature
922919 _output_pipeline = preprocessor .chain (input_layer = input_layer )
920+
921+ # Apply feature selection if enabled for categorical features
922+ if (
923+ self .feature_selection_placement == FeatureSelectionPlacementOptions .TEXT
924+ or self .feature_selection_placement
925+ == FeatureSelectionPlacementOptions .ALL_FEATURES
926+ ):
927+ feature_selector = PreprocessorLayerFactory .variable_selection_layer (
928+ name = f"{ feature_name } _feature_selection" ,
929+ nr_features = 1 , # Single feature for now
930+ units = self .feature_selection_units ,
931+ dropout_rate = self .feature_selection_dropout ,
932+ )
933+ _output_pipeline , feature_weights = feature_selector ([_output_pipeline ])
934+ self .processed_features [f"{ feature_name } _weights" ] = feature_weights
935+
923936 self .processed_features [feature_name ] = _output_pipeline
924937
925938 @_monitor_performance
@@ -981,6 +994,22 @@ def _add_pipeline_date(self, feature_name: str, input_layer) -> None:
981994
982995 # Process the feature
983996 _output_pipeline = preprocessor .chain (input_layer = input_layer )
997+
998+ # Apply feature selection if enabled for categorical features
999+ if (
1000+ self .feature_selection_placement == FeatureSelectionPlacementOptions .DATE
1001+ or self .feature_selection_placement
1002+ == FeatureSelectionPlacementOptions .ALL_FEATURES
1003+ ):
1004+ feature_selector = PreprocessorLayerFactory .variable_selection_layer (
1005+ name = f"{ feature_name } _feature_selection" ,
1006+ nr_features = 1 , # Single feature for now
1007+ units = self .feature_selection_units ,
1008+ dropout_rate = self .feature_selection_dropout ,
1009+ )
1010+ _output_pipeline , feature_weights = feature_selector ([_output_pipeline ])
1011+ self .processed_features [f"{ feature_name } _weights" ] = feature_weights
1012+
9841013 self .processed_features [feature_name ] = _output_pipeline
9851014
9861015 @_monitor_performance
0 commit comments