Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed #297 update tests to check error strings #303

Merged
merged 2 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,6 @@ ENV/
*.pickle

.pytest_cache

#IDE
.vscode
Original file line number Diff line number Diff line change
Expand Up @@ -69,26 +69,36 @@ def test_calc_feature_matrix(entityset):

assert (feature_matrix == labels).values.all()

with pytest.raises(AssertionError):
error_text = 'features must be a non-empty list of features'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix('features', entityset, cutoff_time=cutoff_time)
with pytest.raises(AssertionError):

with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([], entityset, cutoff_time=cutoff_time)
with pytest.raises(AssertionError):

with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([1, 2, 3], entityset, cutoff_time=cutoff_time)
with pytest.raises(TypeError):

error_text = ".*type object 17"
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
instance_ids=range(17),
cutoff_time=17)
with pytest.raises(TypeError):

error_text = 'cutoff_time must be a single value or DataFrame'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
instance_ids=range(17),
cutoff_time=times)

cutoff_times_dup = pd.DataFrame({'time': [pd.datetime(2018, 3, 1),
pd.datetime(2018, 3, 1)],
entityset['log'].index: [1, 1]})
with pytest.raises(AssertionError):

error_text = 'Duplicated rows in cutoff time dataframe.'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([property_feature],
entityset=entityset,
cutoff_time=cutoff_times_dup)
Expand Down Expand Up @@ -280,7 +290,8 @@ def test_training_window(entityset):

entityset.add_last_time_indexes()

with pytest.raises(AssertionError):
error_text = 'training window must be an absolute Timedelta'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_time,
Expand Down Expand Up @@ -646,7 +657,8 @@ def test_cutoff_time_naming(entityset):

assert all((fm1 == fm2.values).values)

with pytest.raises(AttributeError):
error_text = 'Name of the index variable in the target entity or "instance_id" must be present in cutoff_time'
with pytest.raises(AttributeError, match=error_text):
calculate_feature_matrix([dfeat], entityset, cutoff_time=cutoff_df_wrong_index_name)


Expand Down Expand Up @@ -742,13 +754,15 @@ def test_calculating_number_per_chunk():
singleton = pd.DataFrame({'time': [pd.Timestamp('2011-04-08 10:30:00')],
'instance_id': [0]})
shape = cutoff_df.shape
with pytest.raises(ValueError):

error_text = "chunk_size must be None, a float between 0 and 1,a positive integer, or the string 'cutoff time'"
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk(-1, shape)

with pytest.raises(ValueError):
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk("test", shape)

with pytest.raises(ValueError):
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk(2.5, shape)

with pytest.warns(UserWarning):
Expand Down Expand Up @@ -939,7 +953,7 @@ def test_not_enough_memory(self, entityset, monkeypatch):
'Client',
MockClient)
# errors if not enough memory for each worker to store the entityset
with pytest.raises(ValueError):
with pytest.raises(ValueError, match=''):
create_client_and_cluster(n_jobs=1,
num_tasks=5,
dask_kwargs={},
Expand All @@ -962,7 +976,8 @@ def test_parallel_failure_raises_correct_error(entityset):
cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
property_feature = IdentityFeature(entityset['log']['value']) > 10

with pytest.raises(AssertionError):
error_text = 'Need at least one worker'
with pytest.raises(AssertionError, match=error_text):
calculate_feature_matrix([property_feature],
entityset=entityset,
cutoff_time=cutoff_time,
Expand All @@ -984,7 +999,9 @@ def test_n_jobs(entityset):
assert n_jobs_to_workers((cpus + 1) * -1) == 1
if cpus > 1:
assert n_jobs_to_workers(-2) == cpus - 1
with pytest.raises(AssertionError):

error_text = 'Need at least one worker'
with pytest.raises(AssertionError, match=error_text):
n_jobs_to_workers(0)


Expand All @@ -1010,7 +1027,8 @@ def test_integer_time_index_datetime_cutoffs(int_es):
cutoff_df = pd.DataFrame({'time': times, 'instance_id': range(17)})
property_feature = IdentityFeature(int_es['log']['value']) > 10

with pytest.raises(TypeError):
error_text = "Cannot compare type.*"
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df,
Expand Down Expand Up @@ -1045,21 +1063,22 @@ def test_integer_time_index_mixed_cutoff(int_es):
cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
property_feature = IdentityFeature(int_es['log']['value']) > 10

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)

times_str = list(range(8, 17)) + ["foobar", 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_str
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)

times_date_str = list(range(8, 17)) + ['2018-04-02', 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_date_str
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)
Expand All @@ -1069,7 +1088,7 @@ def test_integer_time_index_mixed_cutoff(int_es):
times_int_str = list(range(8, 17)) + ['17', 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_int_str
# calculate_feature_matrix should convert time column to ints successfully here
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)
Expand All @@ -1090,27 +1109,28 @@ def test_datetime_index_mixed_cutoff(entityset):
cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
property_feature = IdentityFeature(entityset['log']['value']) > 10

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

times[9] = "foobar"
cutoff_df['time'] = times
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

cutoff_df['time'].iloc[9] = '2018-04-02 18:50:45.453216'
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

times[9] = '17'
cutoff_df['time'] = times
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)
Expand All @@ -1121,7 +1141,8 @@ def test_string_time_values_in_cutoff_time(entityset):
cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 0]})
agg_feature = Sum(entityset['log']['value'], entityset['customers'])

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([agg_feature], entityset, cutoff_time=cutoff_time)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def test_encode_features_catches_features_mismatch(entityset):

assert 'label' in feature_matrix.columns

with pytest.raises(AssertionError):
error_text = 'Feature session_id not found in feature matrix'
with pytest.raises(AssertionError, match=error_text):
encode_features(feature_matrix, [f1, f3])


Expand Down
3 changes: 2 additions & 1 deletion featuretools/tests/dfs_tests/test_deep_feature_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def find_other_agg_features(features):


def test_ignores_entities(es):
with pytest.raises(TypeError):
error_text = 'ignore_entities must be a list'
with pytest.raises(TypeError, match=error_text):
DeepFeatureSynthesis(target_entity_id='sessions',
entityset=es,
agg_primitives=[Last],
Expand Down
10 changes: 7 additions & 3 deletions featuretools/tests/entityset_tests/test_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def es():

def test_enforces_variable_id_is_str(es):
assert variable_types.Categorical("1", es["customers"])
with pytest.raises(AssertionError):

error_text = 'Variable id must be a string'
with pytest.raises(AssertionError, match=error_text):
variable_types.Categorical(1, es["customers"])


Expand Down Expand Up @@ -67,10 +69,12 @@ def test_update_data(es):
df = es['customers'].df.copy()
df['new'] = [1, 2, 3]

with pytest.raises(ValueError) as excinfo:
error_text = 'Updated dataframe is missing new cohort column'
with pytest.raises(ValueError, match=error_text) as excinfo:
es['customers'].update_data(df.drop(columns=['cohort']))
assert 'Updated dataframe is missing new cohort column' in str(excinfo)

with pytest.raises(ValueError) as excinfo:
error_text = 'Updated dataframe contains 13 columns, expecting 12'
with pytest.raises(ValueError, match=error_text) as excinfo:
es['customers'].update_data(df)
assert 'Updated dataframe contains 13 columns, expecting 12' in str(excinfo)
Loading