/
regressor.py
142 lines (114 loc) · 5.4 KB
/
regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
Copyright 2019 Marco Lattuada
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy
import custom_logger
import data_preparation.column_selection
import data_preparation.ernest
import data_preparation.inversion
import data_preparation.product
import data_preparation.rename_columns
import regression_inputs
class Regressor:
"""
Regressor: it includes preprocesing step plus the actual regressor
Attributes
_campaign_configuration: dict of dict
The set of options specified during the generation of this regressor
_regressor
The actual object performing the regression
_x_columns
The columns used by the regressor
_scalers
The scalers used for the normalization of each column
_logger
The internal logger
Methods
-------
predict()
Predict the target column
get_regressor()
Return the regressor associated with this experiment configuration
"""
def __init__(self, campaign_configuration, regressor, x_columns, scalers):
"""
Parameters
regressor
The wrapped regressor
"""
assert regressor
self._campaign_configuration = campaign_configuration
self._regressor = regressor
self._x_columns = x_columns
self._scalers = scalers
self._logger = custom_logger.getLogger(__name__)
def predict(self, inputs):
data = inputs
inputs_split = {}
column_names = inputs.columns.values.tolist()
data = regression_inputs.RegressionInputs(inputs, inputs_split, column_names, self._campaign_configuration['General']['y'])
self._logger.debug("Created input regression")
# Adding column renaming if required
if 'rename_columns' in self._campaign_configuration['DataPreparation']:
rename_columns_step = data_preparation.rename_columns.RenameColumns(self._campaign_configuration)
data = rename_columns_step.process(data)
self._logger.debug("Performed column renaming")
# Adding column selection if required
if 'use_columns' in self._campaign_configuration['DataPreparation'] or "skip_columns" in self._campaign_configuration['DataPreparation']:
column_selection_step = data_preparation.column_selection.ColumnSelection(self._campaign_configuration)
data = column_selection_step.process(data)
self._logger.debug("Performed column selection")
onehot_encoding_step = data_preparation.onehot_encoding.OnehotEncoding(self._campaign_configuration)
data = onehot_encoding_step.process(data)
# Compute inverse
if 'inverse' in self._campaign_configuration['DataPreparation'] and self._campaign_configuration['DataPreparation']['inverse']:
inversion_step = data_preparation.inversion.Inversion(self._campaign_configuration)
data = inversion_step.process(data)
self._logger.debug("Performed inversion")
# Compute product
if 'product_max_degree' in self._campaign_configuration['DataPreparation'] and self._campaign_configuration['DataPreparation']['product_max_degree']:
inversion_step = data_preparation.product.Product(self._campaign_configuration)
data = inversion_step.process(data)
self._logger.debug("Performed product")
# Create ernest features if required
if 'ernest' in self._campaign_configuration['DataPreparation'] and self._campaign_configuration['DataPreparation']['ernest']:
ernest_step = data_preparation.ernest.Ernest(self._campaign_configuration)
data = ernest_step.process(data)
self._logger.debug("Performed ernest feature computation")
raw_data = data.data
y_column = self._campaign_configuration['General']['y']
try:
# Apply normalization
for column in self._scalers:
if column == y_column:
continue
self._logger.debug("---Applying scaler to %s", column)
data_to_be_normalized = raw_data[column].to_numpy()
data_to_be_normalized = numpy.reshape(data_to_be_normalized, (-1, 1))
normalized_data = self._scalers[column].transform(data_to_be_normalized)
raw_data[column] = normalized_data
self._logger.debug("Performed normalization")
raw_data = raw_data[self._x_columns]
self._logger.debug("Performed columns filtering: %s", str(self._x_columns))
y = self._regressor.predict(raw_data)
except (ValueError, KeyError) as er:
self._logger.error("Input raw data:\n%s", str(raw_data))
raise er
if y_column in self._scalers:
y_scaler = self._scalers[y_column]
y = y_scaler.inverse_transform(y)
return y
def get_regressor(self):
"""
Return the internal regressor"
"""
return self._regressor