-
Notifications
You must be signed in to change notification settings - Fork 268
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
prepares for Command and Conquer style browser based experiment manag…
…ement - `experiment_name` is now compulsory - Addresses #207 and prepare for browser based "command center" - Added 'trees' reduction strategy - Added 'forrest' reduction strategy - added `scan_utils.py` as a home for helper functions for /scan - fixed tests to handle the new changes
- Loading branch information
1 parent
b682a5e
commit 240ff85
Showing
22 changed files
with
209 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
def forrest(self): | ||
|
||
'''Random Forrest based reduction strategy. Somewhat more | ||
aggressive than for example 'spearman' because there are no | ||
negative values, but instead the highest positive correlation | ||
is minused from all the values so that max value is 0, and then | ||
values are turned into positive. The one with the highest positive | ||
score in the end will be dropped. This means that anything with | ||
0 originally, is a candidate for dropping. Because there are multiple | ||
zeroes in many cases, there is an element of randomness on which one | ||
is dropped. | ||
''' | ||
|
||
import wrangle | ||
import numpy as np | ||
|
||
# handle conversion to multi_labels | ||
from .reduce_utils import cols_to_multilabel | ||
data = cols_to_multilabel(self) | ||
|
||
# get the correlations | ||
corr_values = wrangle.df_corr_randomforest(data, self.reduction_metric) | ||
|
||
# drop labels where value is NaN | ||
corr_values.dropna(inplace=True) | ||
|
||
# handle the turning around of values (see docstring for more info) | ||
corr_values -= corr_values[0] | ||
corr_values = corr_values.abs() | ||
|
||
# get the strongest correlation | ||
corr_values = corr_values.index[-1] | ||
|
||
# get the label, value, and dtype from the column header | ||
label, dtype, value = corr_values.split('~') | ||
|
||
# convert things back to their original dtype | ||
value = np.array([value]).astype(dtype)[0] | ||
|
||
# this is where we modify the parameter space accordingly | ||
self.param_object.remove_is(label, value) | ||
|
||
return value, label |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
def trees(self, quantile=.8): | ||
|
||
'''Extra Trees based reduction strategy. Like 'forrest', somewhat more | ||
aggressive than for example 'spearman' because there are no | ||
negative values, but instead the highest positive correlation | ||
is minused from all the values so that max value is 0, and then | ||
values are turned into positive. The one with the highest positive | ||
score in the end will be dropped. This means that anything with | ||
0 originally, is a candidate for dropping. Because there are multiple | ||
zeroes in many cases, there is an element of randomness on which one | ||
is dropped. | ||
''' | ||
|
||
import wrangle | ||
import numpy as np | ||
|
||
# handle conversion to multi_labels | ||
from .reduce_utils import cols_to_multilabel | ||
data = cols_to_multilabel(self) | ||
|
||
# because extra trees wants label as 'y' we first transform with quantile | ||
quantile_value = data[self.reduction_metric].quantile(quantile) | ||
data[self.reduction_metric] = data[self.reduction_metric] > quantile_value | ||
|
||
# get the correlations | ||
corr_values = wrangle.df_corr_extratrees(data, self.reduction_metric) | ||
|
||
# drop labels where value is NaN | ||
corr_values.dropna(inplace=True) | ||
|
||
# handle the turning around of values (see docstring for more info) | ||
corr_values -= corr_values[0] | ||
corr_values = corr_values.abs() | ||
|
||
# get the strongest correlation | ||
corr_values = corr_values.index[-1] | ||
|
||
# get the label, value, and dtype from the column header | ||
label, dtype, value = corr_values.split('~') | ||
|
||
# convert things back to their original dtype | ||
value = np.array([value]).astype(dtype)[0] | ||
|
||
# this is where we modify the parameter space accordingly | ||
self.param_object.remove_is(label, value) | ||
|
||
return value, label |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
def initialize_log(self): | ||
|
||
import time | ||
import os | ||
|
||
# create the experiment folder (unless one is already there) | ||
try: | ||
path = os.getcwd() | ||
os.mkdir(path + '/' + self.experiment_name) | ||
except FileExistsError: | ||
pass | ||
|
||
_experiment_id = time.strftime('%D%H%M%S').replace('/', '') | ||
_file_name = _experiment_id + '.csv' | ||
_experiment_log = './' + self.experiment_name + '/' + _file_name | ||
|
||
f = open(_experiment_log, 'w') | ||
f.write('') | ||
f.close() | ||
|
||
return _experiment_log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.