diff --git a/mltsp/custom_feature_tools.py b/mltsp/custom_feature_tools.py index d410856a..2f20f884 100644 --- a/mltsp/custom_feature_tools.py +++ b/mltsp/custom_feature_tools.py @@ -541,22 +541,26 @@ def generate_custom_features(custom_script_path, t, m, e, Parameters ---------- + t : array_like + Array containing time values. + + m : array_like + Array containing data values. + + e : array_like + Array containing measurement error values. + custom_script_path : str Path to custom features script. - ts_data : list OR tuple, optional - List (or tuple) of lists (or tuples) containing time, - measurement (and optionally associated error values) data. - Defaults to None. If None, path_to_csv must not be None, - otherwise raises an Exception. + features_already_known : dict, optional - List of dicts containing any meta-features associated with - provided time-series data. Defaults to []. + Dict containing any meta-features associated with provided time-series + data. Defaults to {}. Returns ------- - list of dict - List of dictionaries containing newly-generated features. - + dict + Dictionary containing newly-generated features. """ if "t" not in features_already_known: features_already_known['t'] = t diff --git a/mltsp/featurize.py b/mltsp/featurize.py index 852be758..2e180157 100644 --- a/mltsp/featurize.py +++ b/mltsp/featurize.py @@ -12,7 +12,6 @@ import numpy as np from . import cfg -from . import lc_tools from . import custom_feature_tools as cft from . import util from . import custom_exceptions @@ -146,7 +145,6 @@ def generate_features(headerfile_path, zipfile_path, features_to_use, custom_script_path, is_test, already_featurized, in_docker_container): """Generate features for provided time-series data.""" - all_features_list = cfg.features_list_obs[:] + cfg.features_list_science[:] if already_featurized: # Read in features from CSV file objects = parse_prefeaturized_csv_data(headerfile_path) diff --git a/mltsp/obs_feature_tools.py b/mltsp/obs_feature_tools.py index 3d1a6319..bd93623e 100644 --- a/mltsp/obs_feature_tools.py +++ b/mltsp/obs_feature_tools.py @@ -1,8 +1,7 @@ -import copy import numpy as np import scipy.stats as stats from . import cfg -from dask.async import get_sync as dget +import dask.async def double_to_single_step(cads): @@ -83,12 +82,22 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs): Parameters ---------- + t : array_like + Array containing time values. + + m : array_like + Array containing data values. + + e : array_like + Array containing measurement error values. + + features_to_compute : list + Optional list containing names of desired features. Returns ------- dict Dictionary containing generated time series features. - """ features_to_compute = [f for f in features_to_compute if f in cfg.features_list_obs] @@ -147,5 +156,5 @@ def generate_obs_features(t, m, e, features_to_compute=cfg.features_list_obs): # Do not execute in parallel; parallelization has already taken place at # the level of time series, so we compute features for a single time series # in serial. - values = dget(feature_graph, features_to_compute) + values = dask.async.get_sync(feature_graph, features_to_compute) return dict(zip(features_to_compute, values)) diff --git a/mltsp/predict_class.py b/mltsp/predict_class.py index 1d61dea9..a210efb5 100644 --- a/mltsp/predict_class.py +++ b/mltsp/predict_class.py @@ -12,9 +12,7 @@ from . import cfg from . import custom_exceptions -from . import lc_tools from . import custom_feature_tools as cft -from . import util from .celery_tasks import pred_featurize_single diff --git a/mltsp/science_feature_tools.py b/mltsp/science_feature_tools.py index cd36108b..955fa604 100644 --- a/mltsp/science_feature_tools.py +++ b/mltsp/science_feature_tools.py @@ -1,10 +1,11 @@ import numpy as np import cfg import science_features as sf -from dask.async import get_sync as dget +import dask.async -def generate_science_features(t, m, e, features_to_compute=cfg.features_list_science): +def generate_science_features(t, m, e, + features_to_compute=cfg.features_list_science): """Generate science features for provided time series data. Parameters @@ -18,12 +19,14 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci e : array_like Array containing measurement error values. + features_to_compute : list + Optional list containing names of desired features. + Returns ------- dict Dictionary containing newly-generated features. Keys are feature names, values are feature values (floats). - """ features_to_compute = [f for f in features_to_compute if f in cfg.features_list_science] @@ -43,7 +46,8 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci 'percent_amplitude': (sf.percent_amplitude, m), 'percent_beyond_1_std': (sf.percent_beyond_1_std, m, e), 'percent_close_to_median': (sf.percent_close_to_median, m), - 'percent_difference_flux_percentile': (sf.percent_difference_flux_percentile, m), + 'percent_difference_flux_percentile': ( + sf.percent_difference_flux_percentile, m), 'skew': (sf.skew, m), 'std': (sf.std, m), 'stetson_j': (sf.stetson_j, m), @@ -114,12 +118,13 @@ def generate_science_features(t, m, e, features_to_compute=cfg.features_list_sci 'p2p_model': (sf.p2p_model, t, m, 'freq1_freq'), 'p2p_scatter_2praw': (sf.get_p2p_scatter_2praw, 'p2p_model'), 'p2p_scatter_over_mad': (sf.get_p2p_scatter_over_mad, 'p2p_model'), - 'p2p_scatter_pfold_over_mad': (sf.get_p2p_scatter_pfold_over_mad, 'p2p_model'), + 'p2p_scatter_pfold_over_mad': (sf.get_p2p_scatter_pfold_over_mad, + 'p2p_model'), 'p2p_ssqr_diff_over_var': (sf.get_p2p_ssqr_diff_over_var, 'p2p_model'), } # Do not execute in parallel; parallelization has already taken place at # the level of time series, so we compute features for a single time series # in serial. - values = dget(feature_graph, features_to_compute) + values = dask.async.get_sync(feature_graph, features_to_compute) return dict(zip(features_to_compute, values)) diff --git a/mltsp/science_features/tests/test_science_features.py b/mltsp/science_features/tests/test_science_features.py index 5aa23b52..73612534 100644 --- a/mltsp/science_features/tests/test_science_features.py +++ b/mltsp/science_features/tests/test_science_features.py @@ -263,7 +263,6 @@ def test_lomb_scargle_regular_single_freq(): # Only test the first (true) frequency; the rest correspond to noise for j in range(1, NUM_HARMONICS): - # TODO why is this what 'relative phase' means? npt.assert_allclose(phase*j*(-1**j), all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-2, atol=1e-2) @@ -271,7 +270,6 @@ def test_lomb_scargle_regular_single_freq(): for i in [2,3]: npt.assert_allclose(0., all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=1e-3) - # TODO make significance test more precise npt.assert_array_less(10., all_lomb['freq1_signif']) # Only one frequency, so this should explain basically all the variance @@ -325,7 +323,6 @@ def test_lomb_scargle_irregular_single_freq(): npt.assert_allclose(phase*j*(-1**j), all_lomb['freq1_rel_phase{}'.format(j+1)], rtol=1e-1, atol=1e-1) - # TODO make significance test more precise npt.assert_array_less(10., all_lomb['freq1_signif']) # Only one frequency, so this should explain basically all the variance @@ -399,13 +396,7 @@ def test_lomb_scargle_regular_multi_freq(): npt.assert_allclose(amplitudes[i-1,0] / amplitudes[0,0], all_lomb['freq_amplitude_ratio_{}1'.format(i)], atol=2e-2) - # TODO make significance test more precise npt.assert_array_less(10., all_lomb['freq1_signif']) - """ - e_name = 'freq_signif_ratio_{}1_extractor'.format(i) - e = getattr(extractors, e_name)() - npt.assert_allclose(0., all_lomb, atol=1e-3) - """ def test_lomb_scargle_irregular_multi_freq(): @@ -437,13 +428,7 @@ def test_lomb_scargle_irregular_multi_freq(): npt.assert_allclose(frequencies[i-1] / frequencies[0], all_lomb['freq_frequency_ratio_{}1'.format(i)], atol=5e-2) - # TODO make significance test more precise npt.assert_array_less(10., all_lomb['freq1_signif']) -""" - e_name = 'freq_signif_ratio_{}1_extractor'.format(i) - e = getattr(extractors, e_name)() - npt.assert_allclose(0., all_lomb, atol=1e-3) -""" def test_max(): @@ -453,7 +438,7 @@ def test_max(): npt.assert_equal(f.values()[0], max(values)) -# TODO this returns the index of the biggest slope...seems wrong +# TODO uncomment when feature is fixed #def test_max_slope(): # """Test maximum slope feature, which finds the INDEX of the largest slope.""" # times, values, errors = irregular_random() @@ -470,7 +455,7 @@ def test_median_absolute_deviation(): np.median(values)))) - # TODO should replace with commented version once sign problems fixed +# TODO should replace with commented version once sign problems fixed def test_percent_close_to_median(): """Test feature which finds the percentage of points near the median value.""" times, values, errors = irregular_random() diff --git a/mltsp/tests/data/test_features.csv b/mltsp/tests/data/test_features.csv index 2100e894..2f2887ad 100644 --- a/mltsp/tests/data/test_features.csv +++ b/mltsp/tests/data/test_features.csv @@ -1,4 +1,4 @@ -meta1,meta2,meta3,std_err -0.180734306909,0.548427238218,0.187956237253,0.00540072367701 -0.196072341892,1.17178931753,0.174802803661,0.00665375480667 -0.558093146298,0.265003093326,0.10984235246,0.00811408738542 +meta1,meta2,meta3,std_err,amplitude +0.180734306909,0.548427238218,0.187956237253,0.00540072367701,2.0 +0.196072341892,1.17178931753,0.174802803661,0.00665375480667,2.5 +0.558093146298,0.265003093326,0.10984235246,0.00811408738542,3.0 diff --git a/mltsp/tests/data/test_features_wcust.csv b/mltsp/tests/data/test_features_wcust.csv index 68809e57..c0b865b1 100644 --- a/mltsp/tests/data/test_features_wcust.csv +++ b/mltsp/tests/data/test_features_wcust.csv @@ -1,4 +1,4 @@ -meta1,meta2,meta3,std_err,f -0.180734306909,0.548427238218,0.187956237253,0.3622,0.3 -0.196072341892,1.17178931753,0.174802803661,0.116672,0.2 -0.558093146298,0.265003093326,0.10984235246,1.3343,1.4 +meta1,meta2,meta3,std_err,amplitude,f +0.180734306909,0.548427238218,0.187956237253,0.3622,2.0,0.3 +0.196072341892,1.17178931753,0.174802803661,0.116672,2.5,0.2 +0.558093146298,0.265003093326,0.10984235246,1.3343,3.0,1.4 diff --git a/mltsp/tests/data/test_features_with_classes.csv b/mltsp/tests/data/test_features_with_classes.csv index 4a3b3908..3a1a5cf7 100644 --- a/mltsp/tests/data/test_features_with_classes.csv +++ b/mltsp/tests/data/test_features_with_classes.csv @@ -1,4 +1,4 @@ -class,meta1,meta2,meta3,std_err -class1,0.180734306909,0.548427238218,0.187956237253,0.00540072367701 -class2,0.196072341892,1.17178931753,0.174802803661,0.00665375480667 -class3,0.558093146298,0.265003093326,0.10984235246,0.00811408738542 +class,meta1,meta2,meta3,std_err,amplitude +class1,0.180734306909,0.548427238218,0.187956237253,0.5,2.0 +class2,0.196072341892,1.17178931753,0.174802803661,0.23,2.5 +class3,0.558093146298,0.265003093326,0.10984235246,1.20,3.0 diff --git a/mltsp/tests/test_flask_app.py b/mltsp/tests/test_flask_app.py index 714bf76a..b9073919 100644 --- a/mltsp/tests/test_flask_app.py +++ b/mltsp/tests/test_flask_app.py @@ -25,6 +25,7 @@ fa.db_init(force=True) + def featurize_setup(): fpaths = [] dest_paths = [] @@ -88,6 +89,14 @@ def setUp(self): self.login() self.app.post('/check_user_table') + def tearDown(self): + """Reset database to initial empty state after each test. Leaves users, + userauth intact. + """ + conn = r.connect(db="mltsp_testing") + for table_name in ["models", "features", "predictions", "projects"]: + r.table(table_name).delete().run(conn) + def login(self, username=TEST_EMAIL, password=TEST_PASSWORD, app=None): if app is None: app = self.app @@ -236,7 +245,6 @@ def test_update_prediction_entry_with_results(self): npt.assert_equal(entry_dict["features_dict"], features_dict) npt.assert_equal(entry_dict["ts_data_dict"], ts_data) npt.assert_equal(entry_dict["pred_results_list_dict"], results) - r.table("predictions").get(key).delete().run(conn) def test_update_prediction_entry_with_results_err(self): """Test update prediction entry with results - w/ err msg""" @@ -259,7 +267,6 @@ def test_update_prediction_entry_with_results_err(self): npt.assert_equal(entry_dict["ts_data_dict"], ts_data) npt.assert_equal(entry_dict["pred_results_list_dict"], results) npt.assert_equal(entry_dict["err_msg"], "err_msg") - r.table("predictions").get(key).delete().run(conn) def test_update_model_entry_with_results_msg(self): """Test update model entry with results msg""" @@ -271,7 +278,6 @@ def test_update_model_entry_with_results_msg(self): fa.update_model_entry_with_results_msg(key, "MSG") entry_dict = r.table("models").get(key).run(conn) npt.assert_equal(entry_dict["results_msg"], "MSG") - r.table("models").get(key).delete().run(conn) def test_update_model_entry_with_results_msg_err(self): """Test update model entry with results - w/ err msg""" @@ -284,7 +290,6 @@ def test_update_model_entry_with_results_msg_err(self): entry_dict = r.table("models").get(key).run(conn) npt.assert_equal(entry_dict["results_msg"], "MSG") npt.assert_equal(entry_dict["err_msg"], "ERR_MSG") - r.table("models").get(key).delete().run(conn) def test_update_featset_entry_with_results_msg(self): """Test update featset entry with results msg""" @@ -296,7 +301,6 @@ def test_update_featset_entry_with_results_msg(self): fa.update_featset_entry_with_results_msg(key, "MSG") entry_dict = r.table("features").get(key).run(conn) npt.assert_equal(entry_dict["results_msg"], "MSG") - r.table("features").get(key).delete().run(conn) def test_update_featset_entry_with_results_msg_err(self): """Test update featset entry with results msg - err""" @@ -309,7 +313,6 @@ def test_update_featset_entry_with_results_msg_err(self): entry_dict = r.table("features").get(key).run(conn) npt.assert_equal(entry_dict["results_msg"], "MSG") npt.assert_equal(entry_dict["err_msg"], "ERR_MSG") - r.table("features").get(key).delete().run(conn) def test_get_current_userkey(self): """Test get current user key""" @@ -334,7 +337,6 @@ def test_get_all_projkeys(self): r.table("projects").insert({"id": key}).run(conn) all_projkeys = fa.get_all_projkeys() assert all(key in all_projkeys for key in keys) - r.table("projects").get_all(*keys).delete().run(conn) def test_get_authed_projkeys(self): """Test get authed project keys""" @@ -410,10 +412,6 @@ def test_list_featuresets_authed(self): "featlist": [1, 2]}).run(conn) featsets = fa.list_featuresets() r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) - r.table("features").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) npt.assert_equal(len(featsets), 1) assert "created" in featsets[0] and "abc123" in featsets[0] @@ -440,10 +438,6 @@ def test_list_featuresets_all(self): "featlist": [1, 2]}).run(conn) featsets = fa.list_featuresets(auth_only=False, name_only=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) - r.table("features").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) assert len(featsets) > 1 assert all("created" not in featset for featset in featsets) @@ -475,10 +469,6 @@ def test_list_featuresets_html(self): featsets = fa.list_featuresets(auth_only=True, by_project="abc123", as_html_table_string=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) - r.table("features").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) r.table("userauth").get("111").delete().run(conn) assert isinstance(featsets, (str, unicode)) assert "table id" in featsets and "abc123" in featsets @@ -509,10 +499,6 @@ def test_list_models_authed(self): models = fa.list_models() npt.assert_equal(len(models), 1) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("models").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) assert "created" in models[0] and "abc123" in models[0] def test_list_models_all(self): @@ -540,10 +526,6 @@ def test_list_models_all(self): "meta_feats": ["1", "2"]}).run(conn) results = fa.list_models(auth_only=False, name_only=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("models").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) assert len(results) > 1 assert all("created" not in result for result in results) @@ -577,10 +559,6 @@ def test_list_models_html(self): results = fa.list_models(auth_only=True, by_project="abc123", as_html_table_string=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("models").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) r.table("userauth").get("111").delete().run(conn) assert isinstance(results, (str, unicode)) assert "table id" in results and "abc123" in results @@ -617,10 +595,6 @@ def test_list_preds_authed(self): .run(conn) results = fa.list_predictions(auth_only=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) - r.table("predictions").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) npt.assert_equal(len(results), 1) assert "MODEL_NAME" in results[0] @@ -656,10 +630,6 @@ def test_list_predictions_all(self): .run(conn) results = fa.list_predictions(auth_only=False, detailed=False) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) - r.table("predictions").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) assert len(results) > 1 assert all("created" not in result for result in results) @@ -700,10 +670,6 @@ def test_list_predictions_html(self): results = fa.list_predictions(by_project="abc123", as_html_table_string=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) - r.table("predictions").get("111").delete().run(conn) - r.table("projects").get("111").delete().run(conn) r.table("userauth").get("111").delete().run(conn) assert isinstance(results, (str, unicode)) assert "table id" in results and "abc123" in results @@ -718,7 +684,6 @@ def test_get_list_of_projects(self): "email": TEST_EMAIL, "active": "y"}).run(conn) rv = self.app.get('/get_list_of_projects') - r.table("projects").get("abc123").delete().run(conn) r.table("userauth").get("abc123").delete().run(conn) assert '{' in rv.data assert isinstance(eval(rv.data), dict) @@ -739,8 +704,6 @@ def test_list_projects_authed(self): "name": "111"}).run(conn) results = fa.list_projects() r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("projects").get("111").delete().run(conn) npt.assert_equal(len(results), 1) assert "abc123" in results[0] @@ -759,8 +722,6 @@ def test_list_projects_all(self): "name": "111"}).run(conn) results = fa.list_projects(auth_only=False, name_only=True) r.table("userauth").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("projects").get("111").delete().run(conn) assert len(results) >= 2 assert all("created" not in res for res in results) @@ -778,7 +739,6 @@ def test_add_project(self): auth_entries = [] for e in cur: auth_entries.append(e) - r.table("projects").get(new_projkey).delete().run(conn) r.table("userauth").get(auth_entries[0]["id"]).delete().run(conn) npt.assert_equal(len(auth_entries), 1) npt.assert_equal(auth_entries[0]["active"], "y") @@ -798,7 +758,6 @@ def test_add_project_addl_users(self): auth_entries = [] for e in cur: auth_entries.append(e) - r.table("projects").get(new_projkey).delete().run(conn) r.table("userauth").get(auth_entries[0]["id"]).delete().run(conn) r.table("userauth").get(auth_entries[1]["id"]).delete().run(conn) npt.assert_equal(len(auth_entries), 2) @@ -812,7 +771,6 @@ def test_add_featureset(self): new_featset_key = fa.add_featureset(name="TEST", projkey="abc", pid="2", featlist=['f1', 'f2']) entry = r.table("features").get(new_featset_key).run(conn) - r.table("features").get(new_featset_key).delete().run(conn) npt.assert_equal(entry['name'], "TEST") npt.assert_equal(entry['featlist'], ['f1', 'f2']) @@ -825,7 +783,6 @@ def test_add_model(self): featureset_key="123", model_type="RF", projkey="ABC", pid="2") entry = r.table("models").get(new_key).run(conn) - r.table("models").get(new_key).delete().run(conn) npt.assert_equal(entry['name'], "TEST") npt.assert_equal(entry['projkey'], "ABC") @@ -840,8 +797,6 @@ def test_add_model_meta_feats(self): featureset_key="123", model_type="RF", projkey="ABC", pid="2") entry = r.table("models").get(new_key).run(conn) - r.table("models").get(new_key).delete().run(conn) - r.table("features").get("123").delete().run(conn) npt.assert_equal(entry['name'], "TEST") npt.assert_equal(entry['projkey'], "ABC") npt.assert_equal(entry['meta_feats'], ['f1', 'f2']) @@ -859,8 +814,6 @@ def test_add_prediction(self): pred_filename="test.dat", pid="2") entry = r.table("predictions").get(new_key).run(conn) - r.table("predictions").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) npt.assert_equal(entry['project_name'], "abc123") npt.assert_equal(entry['metadata_file'], "None") @@ -890,10 +843,6 @@ def test_get_projects_associated_files(self): "results_str_html": "abcHTML"})\ .run(conn) fpaths = fa.project_associated_files("abc123") - r.table("features").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) short_fnames = [ntpath.basename(fpath) for fpath in fpaths] assert all(fname in short_fnames for fname in ["abc123_RF.pkl"]) @@ -911,7 +860,6 @@ def test_get_models_associated_files(self): "featset_key": "abc123", "meta_feats": ["a", "b", "c"]}).run(conn) fpaths = fa.model_associated_files("abc123") - r.table("models").get("abc123").delete().run(conn) short_fnames = [ntpath.basename(fpath) for fpath in fpaths] assert all(fname in short_fnames for fname in ["abc123_RF.pkl"]) @@ -929,7 +877,6 @@ def test_get_featsets_associated_files(self): "zipfile_path": "ZIPPATH.tar.gz", "featlist": ["a", "b", "c"]}).run(conn) fpaths = fa.featset_associated_files("abc123") - r.table("features").get("abc123").delete().run(conn) short_fnames = [ntpath.basename(fpath) for fpath in fpaths] assert all(fname in short_fnames for fname in ["ZIPPATH.tar.gz", "HEADPATH.dat"]) @@ -1081,10 +1028,6 @@ def test_get_project_details(self): "featlist": ["a", "b", "c"]}).run(conn) proj_info = fa.get_project_details("abc123") - r.table("features").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) r.table("userauth").get("abc123").delete().run(conn) r.table("userauth").get("abc123_2").delete().run(conn) assert all(email in proj_info["authed_users"] for email in @@ -1132,10 +1075,6 @@ def test_get_project_details_json(self): rv = self.app.post("/get_project_details/abc123") fa.app.preprocess_request() conn = fa.g.rdb_conn - r.table("features").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("predictions").get("abc123").delete().run(conn) r.table("userauth").get("abc123").delete().run(conn) r.table("userauth").get("abc123_2").delete().run(conn) res_dict = json.loads(rv.data) @@ -1173,7 +1112,6 @@ def test_project_name_to_key(self): r.table("projects").insert({"id": "abc123", "name": "abc123_name"}).run(conn) key = fa.project_name_to_key("abc123_name") - r.table("projects").get("abc123").delete().run(conn) npt.assert_equal(key, "abc123") def test_featureset_name_to_key(self): @@ -1186,7 +1124,6 @@ def test_featureset_name_to_key(self): "projkey": "abc123"}).run(conn) key = fa.featureset_name_to_key("abc123_name", project_id="abc123") - r.table("features").get("abc123").delete().run(conn) npt.assert_equal(key, "abc123") def test_featureset_name_to_key_projname(self): @@ -1201,8 +1138,6 @@ def test_featureset_name_to_key_projname(self): "name": "abc123_name"}).run(conn) key = fa.featureset_name_to_key("abc123_name", project_name="abc123_name") - r.table("features").get("abc123").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) npt.assert_equal(key, "abc123") def test_update_project_info(self): @@ -1224,8 +1159,6 @@ def test_update_project_info(self): "active": "y"}).run(conn) fa.update_project_info("abc123", "new_name", "DESC!", []) proj_dets = fa.get_project_details("new_name") - r.table("projects").get("abc123").delete().run(conn) - r.table("userauth").get("abc123").delete().run(conn) npt.assert_equal( r.table("userauth").filter( {"id": "abc123_2"}).count().run(conn), @@ -1280,8 +1213,6 @@ def test_update_project_info_delete_models(self): assert os.path.exists(pjoin(cfg.MODELS_FOLDER, "abc123_RF.pkl")) fa.update_project_info("abc123", "abc123", "", [], delete_model_keys=["abc123"]) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) npt.assert_equal( r.table("models").filter({"id": "abc123"}).count().run(conn), 0) @@ -1325,7 +1256,6 @@ def test_get_all_info_dict(self): "userkey": TEST_EMAIL, "active": "y"}).run(conn) d = fa.get_all_info_dict() - r.table("projects").get("abc123").delete().run(conn) r.table("userauth").get("abc123").delete().run(conn) npt.assert_array_equal(d['list_of_current_projects'], ["abc123"]) @@ -1339,7 +1269,6 @@ def test_get_all_info_dict_unauthed(self): r.table("projects").insert({"id": "abc123", "name": "abc123"}).run(conn) d = fa.get_all_info_dict(auth_only=False) - r.table("projects").get("abc123").delete().run(conn) assert len(d["list_of_current_projects"]) > 0 def test_get_list_of_available_features(self): @@ -1465,14 +1394,13 @@ def test_featurize_proc(self): "asas_training_subset_classes_with_metadata.dat"), zipfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"), - features_to_use=["std_err"], + features_to_use=["std_err", "amplitude"], featureset_key="TEST01", is_test=True, email_user=False, already_featurized=False, custom_script_path=pjoin(cfg.UPLOAD_FOLDER, "testfeature1.py")) finally: entry = r.table("features").get("TEST01").run(conn) - r.table("features").get("TEST01").delete().run(conn) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TEST01_features.csv"))) assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, @@ -1509,8 +1437,6 @@ def test_build_model_proc(self): fa.build_model_proc("TEMP_TEST01", "TEMP_TEST01", "RF", "TEMP_TEST01") entry = r.table("models").get("TEMP_TEST01").run(conn) - r.table("models").get("TEMP_TEST01").delete().run(conn) - r.table("features").get("TEMP_TEST01").delete().run(conn) assert "results_msg" in entry assert os.path.exists(pjoin(cfg.MODELS_FOLDER, "TEMP_TEST01_RF.pkl")) @@ -1540,9 +1466,10 @@ def test_prediction_proc(self): r.table("features").insert({"id": "TEMP_TEST01", "name": "TEMP_TEST01", "projkey": "TEMP_TEST01", - "featlist": ["std_err"]}).run(conn) - r.table("projects").insert({"id": "TEMP_TEST01", - "name": "TEMP_TEST01"}).run(conn) + "featlist": ["std_err", + "amplitude"]}).run(conn) + r.table("projects").insert({"id": "TEMP_TEST01", "name": + "TEMP_TEST01"}).run(conn) r.table("predictions").insert({"id": "TEMP_TEST01"}).run(conn) fa.prediction_proc( pjoin(cfg.UPLOAD_FOLDER, "TESTRUN_215153.dat"), @@ -1554,15 +1481,11 @@ def test_prediction_proc(self): entry = r.table("predictions").get("TEMP_TEST01").run(conn) pred_results_list_dict = entry assert(pred_results_list_dict["pred_results_list_dict"] - ["TESTRUN_215153"][0][0] in ['Beta_Lyrae', - 'Herbig_AEBE']) + ["TESTRUN_215153"][0][0] + in ['Beta_Lyrae', 'Herbig_AEBE']) - assert all(key in pred_results_list_dict for key in \ + assert all(key in pred_results_list_dict for key in ("ts_data_dict", "features_dict")) - r.table("models").get("TEMP_TEST01").delete().run(conn) - r.table("projects").get("TEMP_TEST01").delete().run(conn) - r.table("features").get("TEMP_TEST01").delete().run(conn) - r.table("predictions").get("TEMP_TEST01").delete().run(conn) for fpath in [pjoin(cfg.UPLOAD_FOLDER, "TESTRUN_215153.dat"), pjoin(cfg.UPLOAD_FOLDER, "TESTRUN_215153_metadata.dat"), @@ -1619,7 +1542,6 @@ def test_edit_project_form(self): 'addl_authed_users_edit': ''}) res_str = str(rv.data) entry = r.table("projects").get("TESTPROJ01").run(conn) - r.table("projects").get("TESTPROJ01").delete().run(conn) for e in r.table("userauth").filter({"userkey": TEST_EMAIL})\ .run(conn): r.table("userauth").get(e['id']).delete().run(conn) @@ -1652,7 +1574,6 @@ def test_edit_project_form_delete_featset(self): 'delete_features_key': 'abc123'}) res_str = str(rv.data) entry = r.table("projects").get("abc123").run(conn) - r.table("projects").get("abc123").delete().run(conn) for e in r.table("userauth").filter({"userkey": TEST_EMAIL})\ .run(conn): r.table("userauth").get(e['id']).delete().run(conn) @@ -1873,7 +1794,6 @@ def test_new_project(self): res_str = str(rv.data) entry = r.table("projects").filter({"name": "abc123"}).run(conn)\ .next() - r.table("projects").get(entry["id"]).delete().run(conn) for e in r.table("userauth").filter({"userkey": TEST_EMAIL})\ .run(conn): r.table("userauth").get(e['id']).delete().run(conn) @@ -1890,7 +1810,6 @@ def test_new_project_url(self): res_str = str(rv.data) entry = r.table("projects").filter({"name": "abc123"}).run(conn)\ .next() - r.table("projects").get("abc123").delete().run(conn) for e in r.table("userauth").filter({"userkey": TEST_EMAIL})\ .run(conn): r.table("userauth").get(e['id']).delete().run(conn) @@ -1910,7 +1829,6 @@ def test_edit_or_delete_project_form_edit(self): data={"PROJECT_NAME_TO_EDIT": "abc123", 'action': 'Edit'}) res_dict = json.loads(rv.data) - r.table("projects").get("abc123").delete().run(conn) npt.assert_equal(res_dict["name"], "abc123") assert("featuresets" in res_dict) assert("authed_users" in res_dict) @@ -1981,7 +1899,6 @@ def test_edit_or_delete_project_form_invalid(self): data={"PROJECT_NAME_TO_EDIT": "abc123", 'action': 'Invalid action!'}) res_dict = json.loads(rv.data) - r.table("projects").get("abc123").delete().run(conn) npt.assert_equal(res_dict["error"], "Invalid request action.") def test_get_featureset_id_by_projname_and_featsetname(self): @@ -1998,8 +1915,6 @@ def test_get_featureset_id_by_projname_and_featsetname(self): "featlist": ["a", "b", "c"]}).run(conn) rv = self.app.get("/get_featureset_id_by_projname_and_featsetname" "/abc123/abc123") - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) res_id = json.loads(rv.data)["featureset_id"] npt.assert_equal(res_id, "abc123") @@ -2021,9 +1936,6 @@ def test_get_list_of_featuresets_by_project(self): "zipfile_path": "ZIPPATH.tar.gz", "featlist": ["a", "b", "c"]}).run(conn) rv = self.app.get("/get_list_of_featuresets_by_project/abc123") - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("abc123").delete().run(conn) - r.table("features").get("abc123_2").delete().run(conn) featset_list = json.loads(rv.data)["featset_list"] npt.assert_array_equal(sorted(featset_list), ["abc123", "abc123_2"]) @@ -2035,19 +1947,16 @@ def test_get_list_of_models_by_project(self): r.table("projects").insert({"id": "abc123", "name": "abc123"}).run(conn) r.table("models").insert({"id": "abc123", "projkey": "abc123", - "name": "abc123", "created": "abc123", - "type": "RF", - "zipfile_path": "ZIPPATH.tar.gz", - "featlist": ["a", "b", "c"]}).run(conn) + "name": "abc123", "created": "abc123", + "type": "RF", + "zipfile_path": "ZIPPATH.tar.gz", + "featlist": ["a", "b", "c"]}).run(conn) r.table("models").insert({"id": "abc123_2", "projkey": "abc123", - "name": "abc123_2", "created": "abc", - "type": "RF", - "zipfile_path": "ZIPPATH.tar.gz", - "featlist": ["a", "b", "c"]}).run(conn) + "name": "abc123_2", "created": "abc", + "type": "RF", + "zipfile_path": "ZIPPATH.tar.gz", + "featlist": ["a", "b", "c"]}).run(conn) rv = self.app.get("/get_list_of_models_by_project/abc123") - r.table("projects").get("abc123").delete().run(conn) - r.table("models").get("abc123").delete().run(conn) - r.table("models").get("abc123_2").delete().run(conn) model_list = [e.split(" (created")[0] for e in json.loads(rv.data)["model_list"]] npt.assert_array_equal(sorted(model_list), ["abc123 - RF", @@ -2081,7 +1990,7 @@ def test_upload_features_form(self): assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "%s_classes.npy" % new_key))) classes = list(np.load(pjoin(cfg.FEATURES_FOLDER, - "%s_classes.npy" % new_key))) + "%s_classes.npy" % new_key))) assert(all(class_name in ["class1", "class2", "class3"] for class_name in classes)) assert(os.path.exists(pjoin(pjoin(cfg.MLTSP_PACKAGE_PATH, @@ -2092,8 +2001,8 @@ def test_upload_features_form(self): "%s_features.csv" % new_key)) cols = df.columns values = df.values - npt.assert_array_equal(sorted(cols), ["meta1", "meta2", "meta3", - "std_err"]) + npt.assert_array_equal(sorted(cols), ["amplitude", "meta1", + "meta2", "meta3", "std_err"]) fpaths = [] for fpath in [ pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key), @@ -2113,10 +2022,8 @@ def test_upload_features_form(self): if os.path.exists(fpath): os.remove(fpath) e = r.table('features').get(new_key).run(conn) - r.table('features').get(new_key).delete().run(conn) npt.assert_equal(e["name"], "abc123") r.table("features").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) count = r.table("features").filter({"id": new_key}).count()\ .run(conn) npt.assert_equal(count, 0) @@ -2148,7 +2055,7 @@ def test_upload_data_featurize(self): 'featureset_name': 'abc123', 'featureset_project_name_select': 'abc123', 'sep': ',', - 'features_selected': ['std_err'], + 'features_selected': ['std_err', 'amplitude'], 'custom_script_tested': 'yes', 'custom_feat_script_file': (open(pjoin(DATA_DIR, "testfeature1.py")), @@ -2167,7 +2074,7 @@ def test_upload_data_featurize(self): assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "%s_classes.npy" % new_key))) classes = list(np.load(pjoin(cfg.FEATURES_FOLDER, - "%s_classes.npy" % new_key))) + "%s_classes.npy" % new_key))) assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae', 'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti'] @@ -2180,7 +2087,7 @@ def test_upload_data_featurize(self): "%s_features.csv" % new_key)) cols = df.columns values = df.values - npt.assert_array_equal(sorted(cols), ["f", "std_err"]) + npt.assert_array_equal(sorted(cols), ["amplitude", "f", "std_err"]) fpaths = [] for fpath in [ pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key), @@ -2200,7 +2107,6 @@ def test_upload_data_featurize(self): if os.path.exists(fpath): os.remove(fpath) e = r.table('features').get(new_key).run(conn) - r.table('features').get(new_key).delete().run(conn) r.table("features").get(new_key).delete().run(conn) r.table("projects").get("abc123").delete().run(conn) count = r.table("features").filter({"id": new_key}).count()\ @@ -2235,7 +2141,7 @@ def test_upload_data_featurize_no_custom(self): 'featureset_name': 'abc123', 'featureset_project_name_select': 'abc123', 'sep': ',', - 'features_selected': ['std_err'], + 'features_selected': ['std_err', 'amplitude'], 'custom_script_tested': "no", 'is_test': 'True'}) res_dict = json.loads(rv.data) @@ -2249,7 +2155,7 @@ def test_upload_data_featurize_no_custom(self): assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "%s_classes.npy" % new_key))) classes = list(np.load(pjoin(cfg.FEATURES_FOLDER, - "%s_classes.npy" % new_key))) + "%s_classes.npy" % new_key))) assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae', 'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti'] @@ -2262,7 +2168,7 @@ def test_upload_data_featurize_no_custom(self): "%s_features.csv" % new_key)) cols = df.columns values = df.values - npt.assert_array_equal(sorted(cols), ["std_err"]) + npt.assert_array_equal(sorted(cols), ["amplitude", "std_err"]) fpaths = [] for fpath in [ pjoin(cfg.FEATURES_FOLDER, "%s_features.csv" % new_key), @@ -2283,8 +2189,6 @@ def test_upload_data_featurize_no_custom(self): os.remove(fpath) e = r.table('features').get(new_key).run(conn) r.table('features').get(new_key).delete().run(conn) - r.table("features").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) count = r.table("features").filter({"id": new_key}).count()\ .run(conn) npt.assert_equal(count, 0) @@ -2323,7 +2227,7 @@ def test_featurization_page(self): assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "%s_classes.npy" % new_key))) classes = list(np.load(pjoin(cfg.FEATURES_FOLDER, - "%s_classes.npy" % new_key))) + "%s_classes.npy" % new_key))) assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae', 'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti'] @@ -2358,7 +2262,6 @@ def test_featurization_page(self): os.remove(fpath) featurize_teardown() e = r.table('features').get(new_key).run(conn) - r.table('features').get(new_key).delete().run(conn) r.table("features").get(new_key).delete().run(conn) r.table("projects").get("abc123").delete().run(conn) count = r.table("features").filter({"id": new_key}).count()\ @@ -2386,7 +2289,7 @@ def test_featurization_page_already_featurized(self): rv = fa.featurizationPage( featureset_name="abc123", project_name="abc123", headerfile_name=headerfile_name, zipfile_name=None, - sep=",", featlist=["std_err"], is_test=True, + sep=",", featlist=["std_err", "amplitude"], is_test=True, email_user=False, already_featurized=True, custom_script_path=custom_script_path) res_dict = json.loads(rv.data) @@ -2401,7 +2304,7 @@ def test_featurization_page_already_featurized(self): assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER, "%s_classes.npy" % new_key))) classes = list(np.load(pjoin(cfg.FEATURES_FOLDER, - "%s_classes.npy" % new_key))) + "%s_classes.npy" % new_key))) assert(all(class_name in ["class1", "class2", "class3"] for class_name in classes)) assert(os.path.exists(pjoin(pjoin(cfg.MLTSP_PACKAGE_PATH, @@ -2412,7 +2315,7 @@ def test_featurization_page_already_featurized(self): "%s_features.csv" % new_key)) cols = df.columns values = df.values - npt.assert_array_equal(sorted(cols), ["meta1", "meta2", "meta3", + npt.assert_array_equal(sorted(cols), ["amplitude", "meta1", "meta2", "meta3", "std_err"]) fpaths = [] for fpath in [ @@ -2439,10 +2342,8 @@ def test_featurization_page_already_featurized(self): pass featurize_teardown() e = r.table('features').get(new_key).run(conn) - r.table('features').get(new_key).delete().run(conn) npt.assert_equal(e["name"], "abc123") r.table("features").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) count = r.table("features").filter({"id": new_key}).count()\ .run(conn) npt.assert_equal(count, 0) @@ -2478,9 +2379,6 @@ def test_build_model(self): time.sleep(1) new_model_key = res_dict["new_model_key"] entry = r.table("models").get(new_model_key).run(conn) - r.table("models").get(new_model_key).delete().run(conn) - r.table("features").get("TEMP_TEST01").delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) assert "results_msg" in entry assert os.path.exists(pjoin(cfg.MODELS_FOLDER, "TEMP_TEST01_RF.pkl")) @@ -2531,10 +2429,6 @@ def test_upload_prediction_data(self): time.sleep(1) new_key = res_dict["prediction_entry_key"] entry = r.table('predictions').get(new_key).run(conn) - r.table("predictions").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("TEMP_TEST01").delete().run(conn) - r.table("models").get("TEMP_TEST01").delete().run(conn) teardown_model() pred_results = entry["pred_results_list_dict"] feats_dict = entry["features_dict"] @@ -2579,10 +2473,6 @@ def test_prediction_page(self): time.sleep(1) new_key = res_dict["prediction_entry_key"] entry = r.table('predictions').get(new_key).run(conn) - r.table("predictions").get(new_key).delete().run(conn) - r.table("projects").get("abc123").delete().run(conn) - r.table("features").get("TEMP_TEST01").delete().run(conn) - r.table("models").get("TEMP_TEST01").delete().run(conn) for f in dsts: try: os.remove(f) @@ -2604,12 +2494,11 @@ def test_load_source_data(self): fa.app.preprocess_request() conn = fa.g.rdb_conn r.table('predictions').insert({'id': 'abc123', - 'pred_results_list_dict': {'a': 1}, - 'features_dict': {'a': 1}, - 'ts_data_dict': {'a': 1}}).run(conn) + 'pred_results_list_dict': {'a': 1}, + 'features_dict': {'a': 1}, + 'ts_data_dict': {'a': 1}}).run(conn) rv = fa.load_source_data('abc123', 'a') res_dict = json.loads(rv.data) - r.table("predictions").get("abc123").delete().run(conn) for k in ["pred_results", "features_dict", "ts_data"]: npt.assert_equal(res_dict[k], 1) @@ -2619,12 +2508,11 @@ def test_load_source_data_url(self): fa.app.preprocess_request() conn = fa.g.rdb_conn r.table('predictions').insert({'id': 'abc123', - 'pred_results_list_dict': {'a': 1}, - 'features_dict': {'a': 1}, - 'ts_data_dict': {'a': 1}}).run(conn) + 'pred_results_list_dict': {'a': 1}, + 'features_dict': {'a': 1}, + 'ts_data_dict': {'a': 1}}).run(conn) rv = self.app.get("/load_source_data/abc123/a") res_dict = json.loads(rv.data) - r.table("predictions").get("abc123").delete().run(conn) for k in ["pred_results", "features_dict", "ts_data"]: npt.assert_equal(res_dict[k], 1) @@ -2640,7 +2528,6 @@ def test_load_prediction_results(self): "results_str_html": "a"}).run(conn) rv = fa.load_prediction_results('abc123') res_dict = json.loads(rv.data) - r.table("predictions").get("abc123").delete().run(conn) npt.assert_array_equal(res_dict, {'id': 'abc123', 'pred_results_list_dict': {'a': 1}, 'features_dict': {'a': 1}, @@ -2659,7 +2546,6 @@ def test_load_prediction_results_url(self): "results_str_html": "a"}).run(conn) rv = self.app.get("/load_prediction_results/abc123") res_dict = json.loads(rv.data) - r.table("predictions").get("abc123").delete().run(conn) npt.assert_equal(res_dict, {'id': 'abc123', 'pred_results_list_dict': {'a': 1}, 'features_dict': {'a': 1}, @@ -2679,7 +2565,6 @@ def test_load_model_build_results(self): "results_str_html": "a"}).run(conn) rv = fa.load_model_build_results("abc123") res_dict = json.loads(rv.data) - r.table("models").get("abc123").delete().run(conn) npt.assert_equal(res_dict, {'id': 'abc123', 'pred_results_list_dict': {'a': 1}, 'features_dict': {'a': 1}, @@ -2734,7 +2619,6 @@ def test_load_model_build_results_url(self): "results_str_html": "a"}).run(conn) rv = self.app.get("/load_model_build_results/abc123") res_dict = json.loads(rv.data) - r.table("models").get("abc123").delete().run(conn) npt.assert_equal(res_dict, {'id': 'abc123', 'pred_results_list_dict': {'a': 1}, 'features_dict': {'a': 1}, @@ -2755,7 +2639,6 @@ def test_load_featurization_results(self): "results_str_html": "a"}).run(conn) rv = fa.load_featurization_results("abc123") res_dict = json.loads(rv.data) - r.table("features").get("abc123").delete().run(conn) npt.assert_equal(res_dict, {'id': 'abc123', 'pred_results_list_dict': {'a': 1}, 'features_dict': {'a': 1}, @@ -2775,7 +2658,6 @@ def test_load_featurization_results_no_status_msg(self): "results_str_html": "a"}).run(conn) rv = fa.load_featurization_results("abc123") res_dict = json.loads(rv.data) - r.table("features").get("abc123").delete().run(conn) npt.assert_equal(res_dict, {"results_msg": ("No status message could be found for " "this process.")}) diff --git a/mltsp/tests/test_obs_features.py b/mltsp/tests/test_obs_features.py new file mode 100644 index 00000000..85aa6ae4 --- /dev/null +++ b/mltsp/tests/test_obs_features.py @@ -0,0 +1,54 @@ +from mltsp import obs_feature_tools as oft +import itertools + +import numpy as np +import numpy.testing as npt + + +def irregular_random(seed=0, size=50): + """Generate random test data at irregularly-sampled times.""" + state = np.random.RandomState(seed) + times = np.sort(state.uniform(0, 10, size)) + values = state.normal(1, 1, size) + errors = state.exponential(0.1, size) + return times, values, errors + + +def test_delta_t_hist(): + """Test histogram of all time lags.""" + times, values, errors = irregular_random() + delta_ts = [pair[1] - pair[0] for pair in itertools.combinations(times, 2)] + nbins = 50 + bins = np.linspace(0, max(times) - min(times), nbins+1) + npt.assert_allclose(oft.delta_t_hist(times, nbins), np.histogram(delta_ts, + bins=bins)[0]) + + +def test_normalize_hist(): + """Test normalization of histogram.""" + times, values, errors = irregular_random() + delta_ts = [pair[1] - pair[0] for pair in itertools.combinations(times, 2)] + nbins = 50 + bins = np.linspace(0, max(times) - min(times), nbins+1) + nhist = oft.normalize_hist(oft.delta_t_hist(times, nbins), max(times) - + min(times)) + npt.assert_allclose(nhist, np.histogram(delta_ts, + bins=bins, density=True)[0]) + + +def test_find_sorted_peaks(): + """Test peak-finding algorithm.""" + x = np.array([0,5,3,1]) # Single peak + npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[1,5]])) + + x = np.array([0,5,3,6,1]) # Multiple peaks + npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[3,6],[1,5]])) + + x = np.array([3,1,3]) # End-points can be peaks + npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[0,3],[2,3]])) + + x = np.array([0,3,3,3,0]) # In case of ties, peak is left-most point + npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[1,3]])) + + x = np.array([0,3,3,5,0]) # Tie is a peak only if greater than next value + npt.assert_allclose(oft.find_sorted_peaks(x), np.array([[3,5]])) diff --git a/mltsp/tests/test_predict.py b/mltsp/tests/test_predict.py index 3aa5dc49..422bfa42 100644 --- a/mltsp/tests/test_predict.py +++ b/mltsp/tests/test_predict.py @@ -27,7 +27,8 @@ def test_determine_feats_used(): pjoin(DATA_PATH, "test_%s" % suffix), pjoin(cfg.FEATURES_FOLDER, "TEST001_%s" % suffix)) feats_used = pred.determine_feats_used("TEST001") - npt.assert_array_equal(feats_used, ["meta1", "meta2", "meta3", "std_err"]) + npt.assert_array_equal(feats_used, ["meta1", "meta2", "meta3", + "std_err","amplitude"]) for fname in ["TEST001_features.csv", "TEST001_classes.npy"]: os.remove(pjoin(cfg.FEATURES_FOLDER, fname)) @@ -135,7 +136,7 @@ def test_do_model_predictions(): pjoin(cfg.FEATURES_FOLDER, "TEST001_%s" % suffix)) featset_key = "TEST001" model_type = "RF" - features_to_use = ["std_err", "avg_err", "med_err", "n_epochs"] + features_to_use = ["std_err", "avg_err", "med_err", "n_epochs", "amplitude"] data_dict = pred.featurize_tsdata( pjoin(DATA_PATH, "dotastro_215153.dat"), "TEST001",