cb_adf and cb_explore_adf: Setting --cb_type mtr as default (#1838)

* cb_adf: Setting --cb_type mtr as default * cb_explore_adf.cc: default cb_type mtr * Removed dm from cb_type help, since dm is not supported * Fixed test 84 * cb_adf: fixed test 87 * cb_explore_adf: Fixed test 129 * cb_explore_adf: Fixed test 138 * cb_explore_adf: Fixed test 158 * Fixing Java tests errors * Update tests * Precision * Precision * ordering * precision * Fix ips->mtr in comment + adding spaces in cmd
VowpalWabbit · May 1, 2019 · f96e5fb · f96e5fb
1 parent f159ff6
commit f96e5fb
Show file tree

Hide file tree

Showing 13 changed files with 167 additions and 157 deletions.
diff --git a/cs/unittest/TestArguments.cs b/cs/unittest/TestArguments.cs
@@ -1,128 +1,128 @@
-using Microsoft.VisualStudio.TestTools.UnitTesting;
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Threading;
-using System.Threading.Tasks;
-using VW;
-
-namespace cs_unittest
-{
-    [TestClass]
-    public class TestArgumentsClass
-    {
-        [TestMethod]
-        [TestCategory("Vowpal Wabbit")]
-        public void TestArguments()
-        {
-            using (var vw = new VowpalWabbit(new VowpalWabbitSettings("--cb_explore_adf --epsilon 0.3 --interact ud") { Verbose = true }))
-            {
-                // --cb_explore_adf --epsilon 0.3 --interact ud --cb_adf--csoaa_ldf multiline --csoaa_rank
-                Console.WriteLine(vw.Arguments.CommandLine);
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank"));
-                vw.SaveModel("args.model");
-            }
-
-            using (var vw = new VowpalWabbit(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
-            {
-                Console.WriteLine(vw.Arguments.CommandLine);
-                // --no_stdin--bit_precision 18--cb_explore_adf--epsilon 0.300000--cb_adf--cb_type ips --csoaa_ldf multiline--csoaa_rank--interact ud
-
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--no_stdin"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--bit_precision 18"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_type ips"));
-            }
-        }
-
-        [TestMethod]
-        [TestCategory("Vowpal Wabbit")]
-        public void TestQuietAndTestArguments()
-        {
-            using (var vw = new VowpalWabbit("--quiet -t"))
-            {
-                vw.SaveModel("args.model");
-            }
-
-            using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
-            {
-                Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t"));
-
-                using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw }))
-                {
-                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet"));
-                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t"));
-                }
-            }
-
-            using (var vw = new VowpalWabbit(""))
-            {
-                vw.SaveModel("args.model");
-            }
-
-            using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
-            {
-                Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet"));
-                Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t"));
-
-                using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw }))
-                {
-                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet"));
-                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t"));
-                }
-            }
-        }
-
-        [TestMethod]
-        [TestCategory("Vowpal Wabbit")]
-        public void TestArgumentDeDup()
-        {
-            using (var vw = new VowpalWabbit("-l 0.3 -l 0.3 --learning_rate 0.3 -f model1 --save_resume -q ab")) 
-            {
-                Assert.AreEqual(0.3f, vw.Native.Arguments.LearningRate);
-            }
-
-            try
-            {
-                using (var vw =
-                    new VowpalWabbit(
-                        "--cb 2 --cb_type ips --cb_type dm --learning_rate 0.1 -f model_bad --save_resume -q ab"))
-                {
-                    Assert.AreEqual(0.1f, vw.Native.Arguments.LearningRate);
-                }
-
-                Assert.Fail("Disagreering arguments not detected");
-            }
-            catch (VowpalWabbitException)
-            { }
-
-            using (var vw = new VowpalWabbit("-i model1 --save_resume"))
-            {
-                Assert.AreEqual(0.5f, vw.Native.Arguments.LearningRate);
-            }
-
-            using (var vw = new VowpalWabbit("-i model1 --save_resume -q ab -l 0.4"))
-            {
-                Assert.AreEqual(0.4f, vw.Native.Arguments.LearningRate);
-            }
-
-            // make sure different representations of arguments are matched
-            using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1 -f model2"))
-            { }
-
-            using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1000 -i model2"))
-            { }
-        }
-    }
-}
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+using VW;
+
+namespace cs_unittest
+{
+    [TestClass]
+    public class TestArgumentsClass
+    {
+        [TestMethod]
+        [TestCategory("Vowpal Wabbit")]
+        public void TestArguments()
+        {
+            using (var vw = new VowpalWabbit(new VowpalWabbitSettings("--cb_explore_adf --epsilon 0.3 --interact ud") { Verbose = true }))
+            {
+                // --cb_explore_adf --epsilon 0.3 --interact ud --cb_adf --csoaa_ldf multiline --csoaa_rank
+                Console.WriteLine(vw.Arguments.CommandLine);
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank"));
+                vw.SaveModel("args.model");
+            }
+
+            using (var vw = new VowpalWabbit(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
+            {
+                Console.WriteLine(vw.Arguments.CommandLine);
+                // --no_stdin --bit_precision 18 --cb_explore_adf --epsilon 0.300000 --cb_adf --cb_type mtr --csoaa_ldf multiline --csoaa_rank --interact ud
+
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--no_stdin"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--bit_precision 18"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_explore_adf"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--epsilon 0.3"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--interact ud"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_ldf multiline"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--csoaa_rank"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("--cb_type mtr"));
+            }
+        }
+
+        [TestMethod]
+        [TestCategory("Vowpal Wabbit")]
+        public void TestQuietAndTestArguments()
+        {
+            using (var vw = new VowpalWabbit("--quiet -t"))
+            {
+                vw.SaveModel("args.model");
+            }
+
+            using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
+            {
+                Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t"));
+
+                using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw }))
+                {
+                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet"));
+                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t"));
+                }
+            }
+
+            using (var vw = new VowpalWabbit(""))
+            {
+                vw.SaveModel("args.model");
+            }
+
+            using (var vw = new VowpalWabbitModel(new VowpalWabbitSettings { ModelStream = File.Open("args.model", FileMode.Open) }))
+            {
+                Assert.IsFalse(vw.Arguments.CommandLine.Contains("--quiet"));
+                Assert.IsTrue(vw.Arguments.CommandLine.Contains("-t"));
+
+                using (var vwSub = new VowpalWabbit(new VowpalWabbitSettings { Model = vw }))
+                {
+                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("--quiet"));
+                    Assert.IsTrue(vwSub.Arguments.CommandLine.Contains("-t"));
+                }
+            }
+        }
+
+        [TestMethod]
+        [TestCategory("Vowpal Wabbit")]
+        public void TestArgumentDeDup()
+        {
+            using (var vw = new VowpalWabbit("-l 0.3 -l 0.3 --learning_rate 0.3 -f model1 --save_resume -q ab"))
+            {
+                Assert.AreEqual(0.3f, vw.Native.Arguments.LearningRate);
+            }
+
+            try
+            {
+                using (var vw =
+                    new VowpalWabbit(
+                        "--cb 2 --cb_type ips --cb_type dm --learning_rate 0.1 -f model_bad --save_resume -q ab"))
+                {
+                    Assert.AreEqual(0.1f, vw.Native.Arguments.LearningRate);
+                }
+
+                Assert.Fail("Disagreering arguments not detected");
+            }
+            catch (VowpalWabbitException)
+            { }
+
+            using (var vw = new VowpalWabbit("-i model1 --save_resume"))
+            {
+                Assert.AreEqual(0.5f, vw.Native.Arguments.LearningRate);
+            }
+
+            using (var vw = new VowpalWabbit("-i model1 --save_resume -q ab -l 0.4"))
+            {
+                Assert.AreEqual(0.4f, vw.Native.Arguments.LearningRate);
+            }
+
+            // make sure different representations of arguments are matched
+            using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1 -f model2"))
+            { }
+
+            using (var vw = new VowpalWabbit("--cb_explore_adf --epsilon 0.1000 -i model2"))
+            { }
+        }
+    }
+}
diff --git a/java/src/test/java/vowpalWabbit/learner/VWActionProbsLearnerTest.java b/java/src/test/java/vowpalWabbit/learner/VWActionProbsLearnerTest.java
@@ -112,8 +112,8 @@ public void testCBADFExplore() throws IOException {
                     actionProb(1, 0.025f)
                 ),
                 actionProbs(
-                    actionProb(1, 0.97499996f),
-                    actionProb(0, 0.025f)
+                    actionProb(0, 0.97499996f),
+                    actionProb(1, 0.025f)
                 )
         };
         vw.close();

diff --git a/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java b/java/src/test/java/vowpalWabbit/learner/VWActionScoresLearnerTest.java
@@ -97,16 +97,16 @@ private void testCBADF(boolean withRank) throws IOException {
                 actionScore(1, 0)
             ),
             actionScores(
-                actionScore(0, 0.14991696f),
-                actionScore(1, 0.14991696f)
+                actionScore(0, 0.11246802f),
+                actionScore(1, 0.11246802f)
             ),
             actionScores(
-                actionScore(0, 0.27180168f),
-                actionScore(1, 0.31980497f)
+                actionScore(0, 0.3682006f),
+                actionScore(1, 0.5136312f)
             ),
             actionScores(
-                actionScore(1, 0.35295868f),
-                actionScore(0, 0.3869971f)
+                actionScore(0, 0.58848584f),
+                actionScore(1, 0.6244352f)
             )
         };
         vw.close();
@@ -117,8 +117,8 @@ private void testCBADF(boolean withRank) throws IOException {
 
         ActionScores[] expectedTestPreds = new ActionScores[]{
             actionScores(
-                actionScore(0, 0.33543912f),
-                actionScore(1, 0.37897447f)
+                actionScore(0, 0.39904374f),
+                actionScore(1, 0.49083984f)
             )
         };
 

diff --git a/test/RunTests b/test/RunTests
@@ -1226,7 +1226,7 @@ echo "" | {VW}
 
 # Test 84: check cb_adf
 {VW} --cb_adf -d train-sets/cb_test.ldf --noconstant
-    train-sets/ref/cb_adf.stderr
+    train-sets/ref/cb_adf_mtr.stderr
 
 # Test 85: check multilabel_oaa
 {VW} --multilabel_oaa 10 -d train-sets/multilabel -p multilabel.predict
@@ -1577,8 +1577,9 @@ echo "1 | feature:1" | {VW} -a --initial_weight 0.1 --initial_t 0.3
     train-sets/ref/157.stderr
 
 # Test 158: test decision service json parsing
-{VW} -d train-sets/decisionservice.json --dsjson --cb_explore_adf --epsilon 0.2 --quadratic GT
-    train-sets/ref/decisionservice.stderr
+{VW} -d train-sets/decisionservice.json --dsjson --cb_explore_adf --epsilon 0.2 --quadratic GT -P 1 -p cbe_adf_dsjson.predict
+    train-sets/ref/cbe_adf_dsjson.stderr
+    pred-sets/ref/cbe_adf_dsjson.predict
 
 # Test 159: test --bootstrap & --binary interaction
 {VW} -d train-sets/rcv1_mini.dat --bootstrap 5 --binary -c -k --passes 2

diff --git a/test/pred-sets/ref/cb_adf_rank.predict b/test/pred-sets/ref/cb_adf_rank.predict
@@ -1,6 +1,6 @@
 0:0,1:0,2:0
 
-1:-0.182022,0:0.342521
+1:0,0:0.239436
 
-1:-0.256651,0:0.461775
+1:0,0:0.359153
 
diff --git a/test/pred-sets/ref/cbe_adf_dsjson.predict b/test/pred-sets/ref/cbe_adf_dsjson.predict
@@ -0,0 +1,6 @@
+0:0.0833333,1:0.0833333,2:0.0833333,3:0.0833333,4:0.0833333,5:0.0833333,6:0.0833333,7:0.0833333,8:0.0833333,9:0.0833333,10:0.0833333,11:0.0833333
+
+6:0.816667,5:0.0166667,9:0.0166667,2:0.0166667,10:0.0166667,1:0.0166667,3:0.0166667,7:0.0166667,4:0.0166667,0:0.0166667,8:0.0166667,11:0.0166667
+
+6:0.816667,5:0.0166667,9:0.0166667,2:0.0166667,1:0.0166667,3:0.0166667,10:0.0166667,4:0.0166667,0:0.0166667,7:0.0166667,8:0.0166667,11:0.0166667
+
diff --git a/test/pred-sets/ref/cbe_adf_softmax.predict b/test/pred-sets/ref/cbe_adf_softmax.predict
@@ -1,6 +1,6 @@
 0:0.333333,1:0.333333,2:0.333333
 
-1:0.628209,0:0.371791
+1:0.559575,0:0.440425
 
-1:0.67226,0:0.32774
+1:0.588836,0:0.411165
 
diff --git a/test/train-sets/ref/cb_adf_rank.stderr b/test/train-sets/ref/cb_adf_rank.stderr
@@ -9,7 +9,7 @@ num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 2.000000 2.000000            1            1.0    known        0:0...        9
-1.000000 0.000000            2            2.0    known        1:-0.182022...        6
+1.000000 0.000000            2            2.0    known        1:0...        6
 
 finished run
 number of examples = 3

diff --git a/test/train-sets/ref/cbe_adf_cover.stderr b/test/train-sets/ref/cbe_adf_cover.stderr
@@ -1,4 +1,5 @@
 predictions = cbe_adf_cover.predict
+warning: currently, mtr is only used for the first policy in cover, other policies use dr
 Num weight bits = 18
 learning rate = 0.5
 initial_t = 0

diff --git a/test/train-sets/ref/decisionservice.stderr → test/train-sets/ref/cbe_adf_dsjson.stderr b/test/train-sets/ref/decisionservice.stderr → test/train-sets/ref/cbe_adf_dsjson.stderr
@@ -1,4 +1,5 @@
-creating quadratic features for pairs: GT 
+creating quadratic features for pairs: GT
+predictions = cbe_adf_dsjson.predict
 Num weight bits = 18
 learning rate = 0.5
 initial_t = 0
@@ -10,10 +11,11 @@ average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 -0.102041 -0.102041            1            1.0    known        0:0.0833333...      361
 -0.051020 0.000000            2            2.0    known        6:0.816667...      361
+-0.040816 -0.020408            3            3.0    known        6:0.816667...      361
 
 finished run
 number of examples = 3
 weighted example sum = 3.000000
 weighted label sum = 0.000000
-average loss = -0.367347
+average loss = -0.040816
 total feature number = 1083
diff --git a/test/train-sets/ref/cbe_adf_softmax.stderr b/test/train-sets/ref/cbe_adf_softmax.stderr
@@ -9,7 +9,7 @@ num sources = 1
 average  since         example        example  current  current  current
 loss     last          counter         weight    label  predict features
 0.666667 0.666667            1            1.0    known        0:0.333333...        9
-0.333333 0.000000            2            2.0    known        1:0.628209...        6
+0.333333 0.000000            2            2.0    known        1:0.559575...        6
 
 finished run
 number of examples = 3