Skip to content

Commit

Permalink
Remove old import + other style changes
Browse files Browse the repository at this point in the history
Tidying up: remove import of deprecated `lc_tools`, change some test
data, remove extraneous comments, add a couple of docstrings.
  • Loading branch information
bnaul committed Oct 1, 2015
1 parent 7c54921 commit f76e839
Show file tree
Hide file tree
Showing 11 changed files with 97 additions and 50 deletions.
1 change: 0 additions & 1 deletion mltsp/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import numpy as np

from . import cfg
from . import lc_tools
from . import custom_feature_tools as cft
from . import util
from . import custom_exceptions
Expand Down
17 changes: 13 additions & 4 deletions mltsp/obs_feature_tools.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import copy
import numpy as np
import scipy.stats as stats
from . import cfg
from dask.async import get_sync as dget
import dask.async


def double_to_single_step(cads):
Expand Down Expand Up @@ -83,12 +82,22 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs):
Parameters
----------
t : array_like
Array containing time values.
m : array_like
Array containing data values.
e : array_like
Array containing measurement error values.
features_to_compute : list
Optional list containing names of desired features.
Returns
-------
dict
Dictionary containing generated time series features.
"""
features_to_compute = [f for f in features_to_compute if f in
cfg.features_list_obs]
Expand Down Expand Up @@ -147,5 +156,5 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs):
# Do not execute in parallel; parallelization has already taken place at
# the level of time series, so we compute features for a single time series
# in serial.
values = dget(feature_graph, features_to_compute)
values = dask.async.get_sync(feature_graph, features_to_compute)
return dict(zip(features_to_compute, values))
2 changes: 0 additions & 2 deletions mltsp/predict_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@

from . import cfg
from . import custom_exceptions
from . import lc_tools
from . import custom_feature_tools as cft
from . import util
from .celery_tasks import pred_featurize_single


Expand Down
8 changes: 5 additions & 3 deletions mltsp/science_feature_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import cfg
import science_features as sf
from dask.async import get_sync as dget
import dask.async


def generate_science_features(t, m, e, features_to_compute=cfg.features_list_science):
Expand All @@ -18,12 +18,14 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci
e : array_like
Array containing measurement error values.
features_to_compute : list
Optional list containing names of desired features.
Returns
-------
dict
Dictionary containing newly-generated features. Keys are
feature names, values are feature values (floats).
"""
features_to_compute = [f for f in features_to_compute if f in
cfg.features_list_science]
Expand Down Expand Up @@ -121,5 +123,5 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci
# Do not execute in parallel; parallelization has already taken place at
# the level of time series, so we compute features for a single time series
# in serial.
values = dget(feature_graph, features_to_compute)
values = dask.async.get_sync(feature_graph, features_to_compute)
return dict(zip(features_to_compute, values))
19 changes: 2 additions & 17 deletions mltsp/science_features/tests/test_science_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,13 @@ def test_lomb_scargle_regular_single_freq():

# Only test the first (true) frequency; the rest correspond to noise
for j in range(1, NUM_HARMONICS):
# TODO why is this what 'relative phase' means?
npt.assert_allclose(phase*j*(-1**j),
all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-2, atol=1e-2)

# Frequency ratio not relevant since there is only; only test amplitude/signif
for i in [2,3]:
npt.assert_allclose(0., all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=1e-3)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])

# Only one frequency, so this should explain basically all the variance
Expand Down Expand Up @@ -325,7 +323,6 @@ def test_lomb_scargle_irregular_single_freq():
npt.assert_allclose(phase*j*(-1**j),
all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-1, atol=1e-1)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])

# Only one frequency, so this should explain basically all the variance
Expand Down Expand Up @@ -399,13 +396,7 @@ def test_lomb_scargle_regular_multi_freq():
npt.assert_allclose(amplitudes[i-1,0] / amplitudes[0,0],
all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=2e-2)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])
"""
e_name = 'freq_signif_ratio_{}1_extractor'.format(i)
e = getattr(extractors, e_name)()
npt.assert_allclose(0., all_lomb, atol=1e-3)
"""


def test_lomb_scargle_irregular_multi_freq():
Expand Down Expand Up @@ -437,13 +428,7 @@ def test_lomb_scargle_irregular_multi_freq():
npt.assert_allclose(frequencies[i-1] / frequencies[0],
all_lomb['freq_frequency_ratio_{}1'.format(i)], atol=5e-2)

# TODO make significance test more precise
npt.assert_array_less(10., all_lomb['freq1_signif'])
"""
e_name = 'freq_signif_ratio_{}1_extractor'.format(i)
e = getattr(extractors, e_name)()
npt.assert_allclose(0., all_lomb, atol=1e-3)
"""


def test_max():
Expand All @@ -453,7 +438,7 @@ def test_max():
npt.assert_equal(f.values()[0], max(values))


# TODO this returns the index of the biggest slope...seems wrong
# TODO uncomment when feature is fixed
#def test_max_slope():
# """Test maximum slope feature, which finds the INDEX of the largest slope."""
# times, values, errors = irregular_random()
Expand All @@ -470,7 +455,7 @@ def test_median_absolute_deviation():
np.median(values))))


# TODO should replace with commented version once sign problems fixed
# TODO should replace with commented version once sign problems fixed
def test_percent_close_to_median():
"""Test feature which finds the percentage of points near the median value."""
times, values, errors = irregular_random()
Expand Down
8 changes: 4 additions & 4 deletions mltsp/tests/data/test_features.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
meta1,meta2,meta3,std_err
0.180734306909,0.548427238218,0.187956237253,0.00540072367701
0.196072341892,1.17178931753,0.174802803661,0.00665375480667
0.558093146298,0.265003093326,0.10984235246,0.00811408738542
meta1,meta2,meta3,std_err,amplitude
0.180734306909,0.548427238218,0.187956237253,0.00540072367701,0.0
0.196072341892,1.17178931753,0.174802803661,0.00665375480667,0.0
0.558093146298,0.265003093326,0.10984235246,0.00811408738542,0.0
8 changes: 4 additions & 4 deletions mltsp/tests/data/test_features_wcust.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
meta1,meta2,meta3,std_err,f
0.180734306909,0.548427238218,0.187956237253,0.3622,0.3
0.196072341892,1.17178931753,0.174802803661,0.116672,0.2
0.558093146298,0.265003093326,0.10984235246,1.3343,1.4
meta1,meta2,meta3,std_err,amplitude,f
0.180734306909,0.548427238218,0.187956237253,0.3622,0.0,0.3
0.196072341892,1.17178931753,0.174802803661,0.116672,0.0,0.2
0.558093146298,0.265003093326,0.10984235246,1.3343,0.0,1.4
6 changes: 3 additions & 3 deletions mltsp/tests/data/test_features_with_classes.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class,meta1,meta2,meta3,std_err
class1,0.180734306909,0.548427238218,0.187956237253,0.00540072367701
class2,0.196072341892,1.17178931753,0.174802803661,0.00665375480667
class3,0.558093146298,0.265003093326,0.10984235246,0.00811408738542
class1,0.180734306909,0.548427238218,0.187956237253
class2,0.196072341892,1.17178931753,0.174802803661
class3,0.558093146298,0.265003093326,0.10984235246
20 changes: 10 additions & 10 deletions mltsp/tests/test_flask_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1465,7 +1465,7 @@ def test_featurize_proc(self):
"asas_training_subset_classes_with_metadata.dat"),
zipfile_path=pjoin(cfg.UPLOAD_FOLDER,
"asas_training_subset.tar.gz"),
features_to_use=["std_err"],
features_to_use=["std_err", "amplitude"],
featureset_key="TEST01", is_test=True, email_user=False,
already_featurized=False,
custom_script_path=pjoin(cfg.UPLOAD_FOLDER,
Expand Down Expand Up @@ -1540,7 +1540,8 @@ def test_prediction_proc(self):
r.table("features").insert({"id": "TEMP_TEST01",
"name": "TEMP_TEST01",
"projkey": "TEMP_TEST01",
"featlist": ["std_err"]}).run(conn)
"featlist": ["std_err",
"amplitude"]}).run(conn)
r.table("projects").insert({"id": "TEMP_TEST01",
"name": "TEMP_TEST01"}).run(conn)
r.table("predictions").insert({"id": "TEMP_TEST01"}).run(conn)
Expand All @@ -1554,8 +1555,7 @@ def test_prediction_proc(self):
entry = r.table("predictions").get("TEMP_TEST01").run(conn)
pred_results_list_dict = entry
assert(pred_results_list_dict["pred_results_list_dict"]
["TESTRUN_215153"][0][0] in ['Beta_Lyrae',
'Herbig_AEBE'])
["TESTRUN_215153"][0][0] in ['Beta_Lyrae', 'Herbig_AEBE'])

assert all(key in pred_results_list_dict for key in \
("ts_data_dict", "features_dict"))
Expand Down Expand Up @@ -2148,7 +2148,7 @@ def test_upload_data_featurize(self):
'featureset_name': 'abc123',
'featureset_project_name_select': 'abc123',
'sep': ',',
'features_selected': ['std_err'],
'features_selected': ['std_err', 'amplitude'],
'custom_script_tested': 'yes',
'custom_feat_script_file':
(open(pjoin(DATA_DIR, "testfeature1.py")),
Expand Down Expand Up @@ -2180,7 +2180,7 @@ def test_upload_data_featurize(self):
"%s_features.csv" % new_key))
cols = df.columns
values = df.values
npt.assert_array_equal(sorted(cols), ["f", "std_err"])
npt.assert_array_equal(sorted(cols), ["amplitude", "f", "std_err"])
fpaths = []
for fpath in [
pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key),
Expand Down Expand Up @@ -2235,7 +2235,7 @@ def test_upload_data_featurize_no_custom(self):
'featureset_name': 'abc123',
'featureset_project_name_select': 'abc123',
'sep': ',',
'features_selected': ['std_err'],
'features_selected': ['std_err', 'amplitude'],
'custom_script_tested': "no",
'is_test': 'True'})
res_dict = json.loads(rv.data)
Expand All @@ -2262,7 +2262,7 @@ def test_upload_data_featurize_no_custom(self):
"%s_features.csv" % new_key))
cols = df.columns
values = df.values
npt.assert_array_equal(sorted(cols), ["std_err"])
npt.assert_array_equal(sorted(cols), ["amplitude", "std_err"])
fpaths = []
for fpath in [
pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key),
Expand Down Expand Up @@ -2386,7 +2386,7 @@ def test_featurization_page_already_featurized(self):
rv = fa.featurizationPage(
featureset_name="abc123", project_name="abc123",
headerfile_name=headerfile_name, zipfile_name=None,
sep=",", featlist=["std_err"], is_test=True,
sep=",", featlist=["std_err", "amplitude"], is_test=True,
email_user=False, already_featurized=True,
custom_script_path=custom_script_path)
res_dict = json.loads(rv.data)
Expand All @@ -2413,7 +2413,7 @@ def test_featurization_page_already_featurized(self):
cols = df.columns
values = df.values
npt.assert_array_equal(sorted(cols), ["meta1", "meta2", "meta3",
"std_err"])
"std_err", "amplitude"])
fpaths = []
for fpath in [
pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key),
Expand Down
53 changes: 53 additions & 0 deletions mltsp/tests/test_obs_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from mltsp import obs_feature_tools as oft
import itertools

import numpy as np
import numpy.testing as npt


def irregular_random(seed=0, size=50):
"""Generate random test data at irregularly-sampled times."""
state = np.random.RandomState(seed)
times = np.sort(state.uniform(0, 10, size))
values = state.normal(1, 1, size)
errors = state.exponential(0.1, size)
return times, values, errors


def test_delta_t_hist():
"""Test histogram of all time lags."""
times, values, errors = irregular_random()
delta_ts = [pair[1] - pair[0] for pair in itertools.combinations(times, 2)]
nbins = 50
bins = np.linspace(0, max(times) - min(times), nbins+1)
npt.assert_allclose(oft.delta_t_hist(times, nbins), np.histogram(delta_ts,
bins=bins)[0])


def test_normalize_hist():
"""Test normalization of histogram."""
times, values, errors = irregular_random()
delta_ts = [pair[1] - pair[0] for pair in itertools.combinations(times, 2)]
nbins = 50
bins = np.linspace(0, max(times) - min(times), nbins+1)
nhist = oft.normalize_hist(oft.delta_t_hist(times, nbins), max(times) -
min(times))
npt.assert_allclose(nhist, np.histogram(delta_ts,
bins=bins, density=True)[0])

def test_find_sorted_peaks():
"""Test peak-finding algorithm."""
x = np.array([0,5,3,1]) # Single peak
npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[1,5]]))

x = np.array([0,5,3,6,1]) # Multiple peaks
npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[3,6],[1,5]]))

x = np.array([3,1,3]) # End-points can be peaks
npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[0,3],[2,3]]))

x = np.array([0,3,3,3,0]) # In case of ties, peak is left-most point
npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[1,3]]))

x = np.array([0,3,3,5,0]) # Tie is a peak only if greater than next value
npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[3,5]]))
5 changes: 3 additions & 2 deletions mltsp/tests/test_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def test_determine_feats_used():
pjoin(DATA_PATH, "test_%s" % suffix),
pjoin(cfg.FEATURES_FOLDER, "TEST001_%s" % suffix))
feats_used = pred.determine_feats_used("TEST001")
npt.assert_array_equal(feats_used, ["meta1", "meta2", "meta3", "std_err"])
npt.assert_array_equal(feats_used, ["meta1", "meta2", "meta3",
"std_err","amplitude"])

for fname in ["TEST001_features.csv", "TEST001_classes.npy"]:
os.remove(pjoin(cfg.FEATURES_FOLDER, fname))
Expand Down Expand Up @@ -135,7 +136,7 @@ def test_do_model_predictions():
pjoin(cfg.FEATURES_FOLDER, "TEST001_%s" % suffix))
featset_key = "TEST001"
model_type = "RF"
features_to_use = ["std_err", "avg_err", "med_err", "n_epochs"]
features_to_use = ["std_err", "avg_err", "med_err", "n_epochs", "amplitude"]
data_dict = pred.featurize_tsdata(
pjoin(DATA_PATH, "dotastro_215153.dat"),
"TEST001",
Expand Down

0 comments on commit f76e839

Please sign in to comment.