Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove the possibility in DFtoVW to provide constant values #2601

Merged
merged 7 commits into from
Oct 14, 2020
61 changes: 13 additions & 48 deletions python/tests/test_pyvw.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,36 +396,25 @@ def test_feature_column_renaming_and_tag():
conv = DFtoVW(
label=SimpleLabel("y"),
tag="idx",
features=Feature(name="col_x", value="x"),
features=Feature(value="x", rename_feature="col_x"),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 id_1| col_x:2"


def test_constant_feature_value_with_empty_name():
def test_feature_value_with_empty_name():
df = pd.DataFrame({"idx": ["id_1"], "y": [1], "x": [2]})
conv = DFtoVW(
label=SimpleLabel("y"),
tag="idx",
features=Feature(name="", value=2, value_from_df=False),
features=Feature(value="x", rename_feature=""),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 id_1| :2"


def test_variable_feature_name():
df = pd.DataFrame({"y": [1], "x": [2], "a": ["col_x"]})
conv = DFtoVW(
label=SimpleLabel("y"),
features=Feature(name="a", value="x", name_from_df=True),
df=df,
)
first_line = conv.convert_df()[0]
assert first_line == "1 | col_x:2"


def test_multiple_lines():
df = pd.DataFrame({"y": [1, -1], "x": [1, 2]})
conv = DFtoVW(label=SimpleLabel("y"), features=Feature(value="x"), df=df,)
Expand Down Expand Up @@ -457,7 +446,7 @@ def test_without_target_multiple_features():
def test_multiclasslabel():
df = pd.DataFrame({"a": [1], "b": [0.5], "c": ["x"]})
conv = DFtoVW(
df=df, label=MulticlassLabel(name="a", weight="b"), features=Feature("c")
df=df, label=MulticlassLabel(label="a", weight="b"), features=Feature("c")
)
first_line = conv.convert_df()[0]
assert first_line == "1 0.5 | x"
Expand Down Expand Up @@ -498,8 +487,8 @@ def test_absent_col_error():
def test_non_numerical_simplelabel_error():
df = pd.DataFrame({"y": ["a"], "x": ["featX"]})
with pytest.raises(TypeError) as type_error:
DFtoVW(df=df, label=SimpleLabel(name="y"), features=Feature("x"))
expected = "In argument 'name' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float', 'int64'."
DFtoVW(df=df, label=SimpleLabel(label="y"), features=Feature("x"))
expected = "In argument 'label' of 'SimpleLabel', column 'y' should be either of the following type(s): 'int', 'float', 'int64'."
assert expected == str(type_error.value)


Expand All @@ -511,15 +500,15 @@ def test_wrong_feature_type_error():
assert expected == str(type_error.value)


def test_multiclasslabel_non_positive_name_error():
def test_multiclasslabel_non_positive_label_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="a", weight="b"),
label=MulticlassLabel(label="a", weight="b"),
features=Feature("c"),
)
expected = "In argument 'name' of 'MulticlassLabel', column 'a' must be >= 1."
expected = "In argument 'label' of 'MulticlassLabel', column 'a' must be >= 1."
assert expected == str(value_error.value)


Expand All @@ -528,44 +517,20 @@ def test_multiclasslabel_negative_weight_error():
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="y", weight="w"),
label=MulticlassLabel(label="y", weight="w"),
features=Feature("x"),
)
expected = "In argument 'weight' of 'MulticlassLabel', column 'w' must be >= 0."
assert expected == str(value_error.value)


def test_multiclasslabel_non_positive_constant_label_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MulticlassLabel(name=-1, weight="b", name_from_df=False),
features=Feature("c"),
)
expected = "In 'MulticlassLabel', argument 'name' must be >= 1."
assert expected == str(value_error.value)


def test_multiclasslabel_constant_label_type_error():
df = pd.DataFrame({"a": [0], "b": [0.5], "c": ["x"]})
with pytest.raises(TypeError) as type_error:
DFtoVW(
df=df,
label=MulticlassLabel(name="a", weight="b", weight_from_df=False),
features=Feature("c"),
)
expected = "In 'MulticlassLabel', when weight_from_df=False, argument 'weight' should be either of the following type(s): 'int', 'float'."
assert expected == str(type_error.value)


def test_multilabel_non_positive_name_error():
def test_multilabel_non_positive_label_error():
df = pd.DataFrame({"y": [0], "b": [1]})
with pytest.raises(ValueError) as value_error:
DFtoVW(
df=df,
label=MultiLabel(name="y"),
label=MultiLabel(label="y"),
features=Feature("b"),
)
expected = "In argument 'name' of 'MultiLabel', column 'y' must be >= 1."
expected = "In argument 'label' of 'MultiLabel', column 'y' must be >= 1."
assert expected == str(value_error.value)