test: fix bug and add interesting tests for multi -d support (#3897)

Also fixes bug where if more than one positional arg gets supplied, and VW hangs waiting for stdin.
VowpalWabbit · Apr 27, 2022 · 59c4900 · 59c4900
1 parent b5da841
commit 59c4900
Show file tree

Hide file tree

Showing 28 changed files with 5,242 additions and 29 deletions.
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
@@ -77,7 +77,7 @@ jobs:
         # Tests without datafiles are skipped
         run: |
           cd test
-          python run_tests.py -E 0.001 -f --skip_spanning_tree_tests --vw_bin_path "python3 -m vowpalwabbit" --skip_test 60 61 92 96 149 152 177 193 194 195 220 275 276 324 325 326 349 350 356 357 358 385 389 390 391 392 393
+          python run_tests.py -E 0.001 -f --skip_spanning_tree_tests --vw_bin_path "python3 -m vowpalwabbit" --skip_test 60 61 92 96 149 152 177 193 194 195 220 275 276 324 325 326 349 350 356 357 358 385 389 390 391 392 393 400 399 401 403
   linux-python-sdist-bundle:
     name: ubuntu-latest.amd64.py3.8.sdist-bundle
     runs-on: ubuntu-latest

diff --git a/python/tests/test_pyvw.py b/python/tests/test_pyvw.py
@@ -345,7 +345,7 @@ def test_command_line_using_arg_list():
 
 def test_command_line_with_double_space_in_str():
     # Test regression for double space in string breaking splitting
-    model = Workspace(arg_list="--oaa 3 -q ::    ")
+    model = Workspace(arg_str="--oaa 3 -q ::    ")
     del model
 
 

diff --git a/python/tests/test_vwconfig.py b/python/tests/test_vwconfig.py
@@ -19,8 +19,8 @@ def test_vw_config_manager():
     expected_set = {
         "--no_stdin",
         "--quiet",
-        "--loss_function logistic",
-        "--data test/train-sets/rcv1_small.dat",
+        "--loss_function=logistic",
+        "--data=test/train-sets/rcv1_small.dat",
     }
     expected_reductions = {"gd", "scorer-identity", "count_label"}
 

diff --git a/python/vowpalwabbit/pyvw.py b/python/vowpalwabbit/pyvw.py
@@ -176,11 +176,10 @@ def __str__(self):
             if self.is_flag():
                 return "--{}".format(self.name)
             else:
-                # missing list case
                 if isinstance(self.value, list):
-                    return "**NOT_IMPL**"
+                    return " ".join(map(lambda x: f"--{self.name}={x}", self.value))
                 else:
-                    return "--{} {}".format(self.name, self.value)
+                    return "--{}={}".format(self.name, self.value)
         else:
             return ""
 

diff --git a/test/core.vwtest.json b/test/core.vwtest.json
@@ -5123,5 +5123,91 @@
     "input_files": [
       "train-sets/cb_l_namespace.txt"
     ]
+  },
+  {
+    "id": 399,
+    "desc": "test multi d -- based on test 316 to share same metrics of the complete file dsjson_cb.json (which is split into two here)",
+    "bash_command": "./negative-test.sh {VW} -d train-sets/dsjson_cb_part1.json -d train-sets/dsjson_cb_part2.json --dsjson --cb_explore_adf --epsilon 0.2 --quadratic GT --extra_metrics metrics_time.json",
+    "diff_files": {
+      "stdout": "train-sets/ref/metrics_time_fail.stdout",
+      "stderr": "train-sets/ref/multid_fail.stderr"
+    },
+    "input_files": [
+      "./negative-test.sh",
+      "train-sets/dsjson_cb_part1.json",
+      "train-sets/dsjson_cb_part2.json"
+    ]
+  },
+  {
+    "id": 400,
+    "desc": "test multi d positional -- based on test 316 to share same metrics of the complete file dsjson_cb.json (which is split into two here)",
+    "vw_command": "--dsjson --cb_explore_adf --epsilon 0.2 --quadratic GT --extra_metrics metrics_time.json train-sets/dsjson_cb_part1.json train-sets/dsjson_cb_part2.json",
+    "diff_files": {
+      "metrics_time.json": "test-sets/ref/metrics_time_one.json",
+      "stdout": "train-sets/ref/metrics_time_one.stdout",
+      "stderr": "train-sets/ref/multid_one.stderr"
+    },
+    "input_files": [
+      "train-sets/dsjson_cb_part1.json",
+      "train-sets/dsjson_cb_part2.json"
+    ]
+  },
+  {
+    "id": 401,
+    "desc": "test multi d - see test 121 cb_explore",
+    "bash_command": "./negative-test.sh {VW} -d train-sets/rcv1_raw_cb_small_p1.vw -d train-sets/rcv1_raw_cb_small_p2.vw --cb_explore 2 --ngram 2 --skips 4 -b 24 -l 0.25 -p rcv1_raw_cb_explore.preds",
+    "diff_files": {
+      "stderr": "train-sets/ref/rcv1_raw_cb_explore_pass2_split.stderr",
+      "rcv1_raw_cb_explore.preds": "pred-sets/ref/rcv1_raw_cb_explore_pass2_empty.preds",
+      "stdout": "train-sets/ref/rcv1_raw_cb_explore_pass2_fail.stdout"
+    },
+    "input_files": [
+      "./negative-test.sh",
+      "train-sets/rcv1_raw_cb_small_p1.vw",
+      "train-sets/rcv1_raw_cb_small_p2.vw"
+    ]
+  },
+  {
+    "id": 402,
+    "desc": "cb_explore - passes 2",
+    "vw_command": "-d train-sets/rcv1_raw_cb_small.vw --cb_explore 2 --ngram 2 --skips 4 -b 24 -l 0.25 -p rcv1_raw_cb_explore.preds --passes 2 -c -k",
+    "diff_files": {
+      "stderr": "train-sets/ref/rcv1_raw_cb_explore_pass2.stderr",
+      "rcv1_raw_cb_explore.preds": "pred-sets/ref/rcv1_raw_cb_explore_pass2.preds",
+      "stdout": "train-sets/ref/rcv1_raw_cb_explore_pass2.stdout"
+    },
+    "input_files": [
+      "train-sets/rcv1_raw_cb_small.vw"
+    ]
+  },
+  {
+    "id": 403,
+    "desc": "previous but with split files - cb_explore - passes 2",
+    "bash_command": "./negative-test.sh {VW} -d train-sets/rcv1_raw_cb_small_p1.vw -d train-sets/rcv1_raw_cb_small_p2.vw --cb_explore 2 --ngram 2 --skips 4 -b 24 -l 0.25 -p rcv1_raw_cb_explore.preds --passes 2 -c -k --single_cache_file",
+    "diff_files": {
+      "stderr": "train-sets/ref/rcv1_raw_cb_explore_pass2_split.stderr",
+      "rcv1_raw_cb_explore.preds": "pred-sets/ref/rcv1_raw_cb_explore_pass2_empty.preds",
+      "stdout": "train-sets/ref/rcv1_raw_cb_explore_pass2_fail.stdout"
+    },
+    "input_files": [
+      "./negative-test.sh",
+      "train-sets/rcv1_raw_cb_small_p1.vw",
+      "train-sets/rcv1_raw_cb_small_p2.vw"
+    ]
+  },
+  {
+    "id": 404,
+    "desc": "test multi d positional mixed with -d, this should fail",
+    "vw_command": "--dsjson --cb_explore_adf --epsilon 0.2 train-sets/dsjson_cb_part2.json --quadratic GT --extra_metrics metrics_time.json -d train-sets/dsjson_cb_part1.json",
+    "diff_files": {
+      "stderr": "train-sets/ref/multid_mixed.stderr",
+      "stdout": "train-sets/ref/multid_mixed.stdout"
+    },
+    "input_files": [
+      "train-sets/dsjson_cb_part1.json",
+      "train-sets/dsjson_cb_part2.json"
+    ]
   }
+
+
 ]