Skip to content

Commit

Permalink
Remove the possibility in DFtoVW to provide constant values (#2601)
Browse files Browse the repository at this point in the history
This commit also:
- removes unnecessary code linked with *_from_df
- removes related tests
- cleans docstring
- In Feature, change arg name to rename_feature
- In *Label class, change name to label
  • Loading branch information
etiennekintzler committed Oct 14, 2020
1 parent aeb7bc7 commit 54a3778
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 279 deletions.
61 changes: 13 additions & 48 deletions python/tests/test_pyvw.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,36 +396,25 @@ def test_feature_column_renaming_and_tag():
conv = DFtoVW(
label=SimpleLabel("y"),
tag="idx",
features=Feature(name="col_x", value="x"),
features=Feature(value="x", rename_feature="col_x"),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 id_1| col_x:2"


def test_constant_feature_value_with_empty_name():
def test_feature_value_with_empty_name():
df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
conv = DFtoVW(
label=SimpleLabel("y"),
tag="idx",
features=Feature(name="", value=2, value_from_df=False),
features=Feature(value="x", rename_feature=""),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 id_1| :2"


def test_variable_feature_name():
df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
conv = DFtoVW(
label=SimpleLabel("y"),
features=Feature(name="a", value="x", name_from_df=True),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 | col_x:2"


def test_multiple_lines():
df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
conv = DFtoVW(label=SimpleLabel("y"), features=Feature(value="x"), df=df,)
Expand Down Expand Up @@ -457,7 +446,7 @@ def test_without_target_multiple_features():
def test_multiclasslabel():
df = pd.DataFrame({"a": [1], "b": [0.5], "c": ["x"]})
conv = DFtoVW(
df=df, label=MulticlassLabel(name="a", weight="b"), features=Feature("c")
df=df, label=MulticlassLabel(label="a", weight="b"), features=Feature("c")
)
first_line = conv.convert_df()[0]
assert first_line == "1 0.5 | x"
Expand Down Expand Up @@ -498,8 +487,8 @@ def test_absent_col_error():
def test_non_numerical_simplelabel_error():
df = pd.DataFrame({"y": ["a"], "x": ["featX"]})
with pytest.raises(TypeError) as type_error:
DFtoVW(df=df, label=SimpleLabel(name="y"), features=Feature("x"))
expected = "In argument 'name' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float', 'int64'."
DFtoVW(df=df, label=SimpleLabel(label="y"), features=Feature("x"))
expected = "In argument 'label' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float', 'int64'."
assert expected == str(type_error.value)


Expand All @@ -511,15 +500,15 @@ def test_wrong_feature_type_error():
assert expected == str(type_error.value)


def test_multiclasslabel_non_positive_name_error():
def test_multiclasslabel_non_positive_label_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="a", weight="b"),
label=MulticlassLabel(label="a", weight="b"),
features=Feature("c"),
)
expected = "In argument 'name' of 'MulticlassLabel', column 'a' must be >= 1."
expected = "In argument 'label' of 'MulticlassLabel', column 'a' must be >= 1."
assert expected == str(value_error.value)


Expand All @@ -528,44 +517,20 @@ def test_multiclasslabel_negative_weight_error():
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="y", weight="w"),
label=MulticlassLabel(label="y", weight="w"),
features=Feature("x"),
)
expected = "In argument 'weight' of 'MulticlassLabel', column 'w' must be >= 0."
assert expected == str(value_error.value)


def test_multiclasslabel_non_positive_constant_label_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name=-1, weight="b", name_from_df=False),
features=Feature("c"),
)
expected = "In 'MulticlassLabel', argument 'name' must be >= 1."
assert expected == str(value_error.value)


def test_multiclasslabel_constant_label_type_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(TypeError) as type_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="a", weight="b", weight_from_df=False),
features=Feature("c"),
)
expected = "In 'MulticlassLabel', when weight_from_df=False, argument 'weight' should be either of the following type(s): 'int', 'float'."
assert expected == str(type_error.value)


def test_multilabel_non_positive_name_error():
def test_multilabel_non_positive_label_error():
df = pd.DataFrame({"y": [0], "b": [1]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MultiLabel(name="y"),
label=MultiLabel(label="y"),
features=Feature("b"),
)
expected = "In argument 'name' of 'MultiLabel', column 'y' must be >= 1."
expected = "In argument 'label' of 'MultiLabel', column 'y' must be >= 1."
assert expected == str(value_error.value)

0 comments on commit 54a3778

Please sign in to comment.