Skip to content

Commit

Permalink
Remove old import + other style changes
Browse files Browse the repository at this point in the history
Tidying up: remove import of deprecated `lc_tools`, add a couple tests
for `obs_feature_tools`, change some test data, remove extraneous
comments, add a couple of docstrings, change `test_flask_app` database
teardown procedure.
  • Loading branch information
bnaul committed Oct 1, 2015
1 parent 7c54921 commit 2660c11
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 217 deletions.
24 changes: 14 additions & 10 deletions mltsp/custom_feature_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,22 +541,26 @@ def generate_custom_features(custom_script_path, t, m, e,
Parameters
----------
t : array_like
Array containing time values.
m : array_like
Array containing data values.
e : array_like
Array containing measurement error values.
custom_script_path : str
Path to custom features script.
ts_data : list OR tuple, optional
List (or tuple) of lists (or tuples) containing time,
measurement (and optionally associated error values) data.
Defaults to None. If None, path_to_csv must not be None,
otherwise raises an Exception.
features_already_known : dict, optional
List of dicts containing any meta-features associated with
provided time-series data. Defaults to [].
Dict containing any meta-features associated with provided time-series
data. Defaults to {}.
Returns
-------
list of dict
List of dictionaries containing newly-generated features.
dict
Dictionary containing newly-generated features.
"""
if "t" not in features_already_known:
features_already_known['t'] = t
Expand Down
2 changes: 0 additions & 2 deletions mltsp/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import numpy as np

from . import cfg
from . import lc_tools
from . import custom_feature_tools as cft
from . import util
from . import custom_exceptions
Expand Down Expand Up @@ -146,7 +145,6 @@ def generate_features(headerfile_path, zipfile_path, features_to_use,
custom_script_path, is_test, already_featurized,
in_docker_container):
"""Generate features for provided time-series data."""
all_features_list = cfg.features_list_obs[:] + cfg.features_list_science[:]
if already_featurized:
# Read in features from CSV file
objects = parse_prefeaturized_csv_data(headerfile_path)
Expand Down
17 changes: 13 additions & 4 deletions mltsp/obs_feature_tools.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import copy
import numpy as np
import scipy.stats as stats
from . import cfg
from dask.async import get_sync as dget
import dask.async


def double_to_single_step(cads):
Expand Down Expand Up @@ -83,12 +82,22 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs):
Parameters
----------
t : array_like
Array containing time values.
m : array_like
Array containing data values.
e : array_like
Array containing measurement error values.
features_to_compute : list
Optional list containing names of desired features.
Returns
-------
dict
Dictionary containing generated time series features.
"""
features_to_compute = [f for f in features_to_compute if f in
cfg.features_list_obs]
Expand Down Expand Up @@ -147,5 +156,5 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs):
# Do not execute in parallel; parallelization has already taken place at
# the level of time series, so we compute features for a single time series
# in serial.
values = dget(feature_graph, features_to_compute)
values = dask.async.get_sync(feature_graph, features_to_compute)
return dict(zip(features_to_compute, values))
2 changes: 0 additions & 2 deletions mltsp/predict_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@

from . import cfg
from . import custom_exceptions
from . import lc_tools
from . import custom_feature_tools as cft
from . import util
from .celery_tasks import pred_featurize_single


Expand Down
17 changes: 11 additions & 6 deletions mltsp/science_feature_tools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import numpy as np
import cfg
import science_features as sf
from dask.async import get_sync as dget
import dask.async


def generate_science_features(t, m, e, features_to_compute=cfg.features_list_science):
def generate_science_features(t, m, e,
features_to_compute=cfg.features_list_science):
"""Generate science features for provided time series data.
Parameters
Expand All @@ -18,12 +19,14 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci
e : array_like
Array containing measurement error values.
features_to_compute : list
Optional list containing names of desired features.
Returns
-------
dict
Dictionary containing newly-generated features. Keys are
feature names, values are feature values (floats).
"""
features_to_compute = [f for f in features_to_compute if f in
cfg.features_list_science]
Expand All @@ -43,7 +46,8 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci
'percent_amplitude': (sf.percent_amplitude, m),
'percent_beyond_1_std': (sf.percent_beyond_1_std, m, e),
'percent_close_to_median': (sf.percent_close_to_median, m),
'percent_difference_flux_percentile': (sf.percent_difference_flux_percentile, m),
'percent_difference_flux_percentile': (
sf.percent_difference_flux_percentile, m),
'skew': (sf.skew, m),
'std': (sf.std, m),
'stetson_j': (sf.stetson_j, m),
Expand Down Expand Up @@ -114,12 +118,13 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci
'p2p_model': (sf.p2p_model, t, m, 'freq1_freq'),
'p2p_scatter_2praw': (sf.get_p2p_scatter_2praw, 'p2p_model'),
'p2p_scatter_over_mad': (sf.get_p2p_scatter_over_mad, 'p2p_model'),
'p2p_scatter_pfold_over_mad': (sf.get_p2p_scatter_pfold_over_mad, 'p2p_model'),
'p2p_scatter_pfold_over_mad': (sf.get_p2p_scatter_pfold_over_mad,
'p2p_model'),
'p2p_ssqr_diff_over_var': (sf.get_p2p_ssqr_diff_over_var, 'p2p_model'),
}

# Do not execute in parallel; parallelization has already taken place at
# the level of time series, so we compute features for a single time series
# in serial.
values = dget(feature_graph, features_to_compute)
values = dask.async.get_sync(feature_graph, features_to_compute)
return dict(zip(features_to_compute, values))
19 changes: 2 additions & 17 deletions mltsp/science_features/tests/test_science_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,13 @@ def test_lomb_scargle_regular_single_freq():

# Only test the first (true) frequency; the rest correspond to noise
for j in range(1, NUM_HARMONICS):
# TODO why is this what 'relative phase' means?
npt.assert_allclose(phase*j*(-1**j),
all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-2, atol=1e-2)

# Frequency ratio not relevant since there is only; only test amplitude/signif
for i in [2,3]:
npt.assert_allclose(0., all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=1e-3)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])

# Only one frequency, so this should explain basically all the variance
Expand Down Expand Up @@ -325,7 +323,6 @@ def test_lomb_scargle_irregular_single_freq():
npt.assert_allclose(phase*j*(-1**j),
all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-1, atol=1e-1)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])

# Only one frequency, so this should explain basically all the variance
Expand Down Expand Up @@ -399,13 +396,7 @@ def test_lomb_scargle_regular_multi_freq():
npt.assert_allclose(amplitudes[i-1,0] / amplitudes[0,0],
all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=2e-2)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])
"""
e_name = 'freq_signif_ratio_{}1_extractor'.format(i)
e = getattr(extractors, e_name)()
npt.assert_allclose(0., all_lomb, atol=1e-3)
"""


def test_lomb_scargle_irregular_multi_freq():
Expand Down Expand Up @@ -437,13 +428,7 @@ def test_lomb_scargle_irregular_multi_freq():
npt.assert_allclose(frequencies[i-1] / frequencies[0],
all_lomb['freq_frequency_ratio_{}1'.format(i)], atol=5e-2)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])
"""
e_name = 'freq_signif_ratio_{}1_extractor'.format(i)
e = getattr(extractors, e_name)()
npt.assert_allclose(0., all_lomb, atol=1e-3)
"""


def test_max():
Expand All @@ -453,7 +438,7 @@ def test_max():
npt.assert_equal(f.values()[0], max(values))


# TODO this returns the index of the biggest slope...seems wrong
# TODO uncomment when feature is fixed
#def test_max_slope():
# """Test maximum slope feature, which finds the INDEX of the largest slope."""
# times, values, errors = irregular_random()
Expand All @@ -470,7 +455,7 @@ def test_median_absolute_deviation():
np.median(values))))


# TODO should replace with commented version once sign problems fixed
# TODO should replace with commented version once sign problems fixed
def test_percent_close_to_median():
"""Test feature which finds the percentage of points near the median value."""
times, values, errors = irregular_random()
Expand Down
8 changes: 4 additions & 4 deletions mltsp/tests/data/test_features.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
meta1,meta2,meta3,std_err
0.180734306909,0.548427238218,0.187956237253,0.00540072367701
0.196072341892,1.17178931753,0.174802803661,0.00665375480667
0.558093146298,0.265003093326,0.10984235246,0.00811408738542
meta1,meta2,meta3,std_err,amplitude
0.180734306909,0.548427238218,0.187956237253,0.00540072367701,2.0
0.196072341892,1.17178931753,0.174802803661,0.00665375480667,2.5
0.558093146298,0.265003093326,0.10984235246,0.00811408738542,3.0
8 changes: 4 additions & 4 deletions mltsp/tests/data/test_features_wcust.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
meta1,meta2,meta3,std_err,f
0.180734306909,0.548427238218,0.187956237253,0.3622,0.3
0.196072341892,1.17178931753,0.174802803661,0.116672,0.2
0.558093146298,0.265003093326,0.10984235246,1.3343,1.4
meta1,meta2,meta3,std_err,amplitude,f
0.180734306909,0.548427238218,0.187956237253,0.3622,2.0,0.3
0.196072341892,1.17178931753,0.174802803661,0.116672,2.5,0.2
0.558093146298,0.265003093326,0.10984235246,1.3343,3.0,1.4
8 changes: 4 additions & 4 deletions mltsp/tests/data/test_features_with_classes.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class,meta1,meta2,meta3,std_err
class1,0.180734306909,0.548427238218,0.187956237253,0.00540072367701
class2,0.196072341892,1.17178931753,0.174802803661,0.00665375480667
class3,0.558093146298,0.265003093326,0.10984235246,0.00811408738542
class,meta1,meta2,meta3,std_err,amplitude
class1,0.180734306909,0.548427238218,0.187956237253,0.5,2.0
class2,0.196072341892,1.17178931753,0.174802803661,0.23,2.5
class3,0.558093146298,0.265003093326,0.10984235246,1.20,3.0

0 comments on commit 2660c11

Please sign in to comment.