Merge remote-tracking branch 'remotes/@vw/master' into cs-simulator

VowpalWabbit · Oct 23, 2018 · 7da0cb7 · 7da0cb7
2 parents 1168aba + b3aa2c7
commit 7da0cb7
Show file tree

Hide file tree

Showing 30 changed files with 519 additions and 288 deletions.
diff --git a/cs/setup/setup.wixproj b/cs/setup/setup.wixproj
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <Import Project="..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props" Condition="Exists('..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props')" />
   <PropertyGroup>
     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -15,23 +15,14 @@
     <NuGetPackageImportStamp>
     </NuGetPackageImportStamp>
   </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
+    <OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
+    <IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
     <DefineConstants>Debug</DefineConstants>
   </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
-    <DefineConstants>Debug</DefineConstants>
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
+    <OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
+    <IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
     <SuppressValidation>True</SuppressValidation>
   </PropertyGroup>
   <ItemGroup>

diff --git a/cs/setup_bundle/Product.wxs b/cs/setup_bundle/Product.wxs
@@ -8,7 +8,7 @@
       <!-- .NET Dependency -->
       <PackageGroupRef Id="NetFx45Web"/>
       <!-- Visual Studio Runtime Dependency -->
-      <PackageGroupRef Id="redist_vc120"/>
+      <PackageGroupRef Id="redist_vc140"/>
       <!-- Actual binaries-->
       <PackageGroupRef Id="VowpalWabbitPackage"/>
     </Chain>
@@ -24,15 +24,19 @@
     </PackageGroup>
   </Fragment>
 
-  <!--visual studio runtime 12-->
+  <!--visual studio runtime 14-->
   <Fragment>
-    <PackageGroup Id="redist_vc120">
-      <ExePackage Id="vc120" Cache="yes" PerMachine="yes" Permanent="yes" Vital="yes" Compressed="yes"
-                  SourceFile="C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\redist\1033\vcredist_x64.exe"
+    <PackageGroup Id="redist_vc140">
+      <ExePackage Id="vc140" Cache="yes" PerMachine="yes" Permanent="yes" Vital="yes" Compressed="yes"
+                  SourceFile="C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\redist\1033\vcredist_x64.exe"
                   Name="vcredist_x64.exe"
                   InstallCommand="/quiet /norestart"
                   Protocol="burn"
-                  />
+                  >
+        <!-- Below exit codes come from https://stackoverflow.com/questions/37396773/wix-burn-vcredist -->
+        <ExitCode Value="3010" Behavior="forceReboot"/>
+        <ExitCode Value="1638" Behavior="success"/> <!-- Ignore "Newer version installed" error -->
+      </ExePackage>
 
     </PackageGroup>
   </Fragment>

diff --git a/cs/setup_bundle/setup_bundle.wixproj b/cs/setup_bundle/setup_bundle.wixproj
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <Import Project="..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props" Condition="Exists('..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props')" />
   <PropertyGroup>
     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -15,41 +15,15 @@
     <NuGetPackageImportStamp>
     </NuGetPackageImportStamp>
   </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
-    <OutputPath>bin\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
+    <OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
+    <IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
     <DefineConstants>Debug</DefineConstants>
   </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
-    <DefineConstants>Debug</DefineConstants>
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
-    <OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
-    <DefineConstants>Debug</DefineConstants>
-    <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
-    <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
-    <DefineConstants>Debug</DefineConstants>
-    <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
-    <OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
-    <IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
+    <OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
+    <IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
+    <SuppressValidation>True</SuppressValidation>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="Product.wxs" />

diff --git a/cs/version.props b/cs/version.props
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 <PropertyGroup>
-<VowpalWabbitAssemblyVersion>8.4.0.1</VowpalWabbitAssemblyVersion>
+<VowpalWabbitAssemblyVersion>8.6.1.0</VowpalWabbitAssemblyVersion>
 </PropertyGroup>
 </Project>
diff --git a/reinforcement_learning/bindings/python/py_api.cc b/reinforcement_learning/bindings/python/py_api.cc
@@ -93,24 +93,32 @@ namespace reinforcement_learning {
       return response;
     }
 
-    ranking_response live_model::choose_rank(const char* event_id, const char* context_json) {
+    ranking_response live_model::choose_rank(const char* event_id, const char* context_json, bool deferred) {
       reinforcement_learning::ranking_response response_impl;
       reinforcement_learning::api_status status;
-      _impl.choose_rank(event_id, context_json, response_impl, &status);
+      unsigned int flags = deferred ? action_flags::DEFERRED : action_flags::DEFAULT;
+      _impl.choose_rank(event_id, context_json, flags, response_impl, &status);
       check_api_status(status);
 
       return convert_ranking_response(response_impl);
     }
     // event_id is auto-generated.
-    ranking_response live_model::choose_rank(const char* context_json) {
+    ranking_response live_model::choose_rank(const char* context_json, bool deferred) {
       reinforcement_learning::ranking_response response_impl;
       reinforcement_learning::api_status status;
-      _impl.choose_rank(context_json, response_impl, &status);
+      unsigned int flags = deferred ? action_flags::DEFERRED : action_flags::DEFAULT;
+      _impl.choose_rank(context_json, flags, response_impl, &status);
       check_api_status(status);
 
       return convert_ranking_response(response_impl);
     }
 
+    void live_model::report_action_taken(const char* event_id) {
+      reinforcement_learning::api_status status;
+      _impl.report_action_taken(event_id, &status);
+      check_api_status(status);
+    }
+
     void live_model::report_outcome(const char* event_id, const char* outcome) {
       reinforcement_learning::api_status status;
       _impl.report_outcome(event_id, outcome, &status);

diff --git a/reinforcement_learning/bindings/python/py_api.h b/reinforcement_learning/bindings/python/py_api.h
@@ -34,9 +34,11 @@ namespace reinforcement_learning {
 
       void init();
 
-      ranking_response choose_rank(const char* event_id, const char* context_json);
+      ranking_response choose_rank(const char* event_id, const char* context_json, bool deferred);
       // event_id is auto-generated.
-      ranking_response choose_rank(const char* context_json);
+      ranking_response choose_rank(const char* context_json, bool deferred);
+
+      void report_action_taken(const char* event_id);
 
       void report_outcome(const char* event_id, const char* outcome);
       void report_outcome(const char* event_id, float outcome);

diff --git a/reinforcement_learning/bindings/python/rl_client.i b/reinforcement_learning/bindings/python/rl_client.i
@@ -47,16 +47,18 @@ namespace reinforcement_learning {
       void init();
 
       %rename(choose_rank_impl) choose_rank;
-      reinforcement_learning::python::ranking_response choose_rank(const char* event_id, const char* context_json);
+      reinforcement_learning::python::ranking_response choose_rank(const char* event_id, const char* context_json, bool deferred);
       // event_id is auto-generated.
-      reinforcement_learning::python::ranking_response choose_rank(const char* context_json);
+      reinforcement_learning::python::ranking_response choose_rank(const char* context_json, bool deferred);
+
+      void report_action_taken(const char* event_id);
 
       void report_outcome(const char* event_id, const char* outcome);
       void report_outcome(const char* event_id, float outcome);
 
       %pythoncode %{
-        def choose_rank(self, *args):
-            ranking_response = self.choose_rank_impl(*args)
+        def choose_rank(self, *args, deferred = False):
+            ranking_response = self.choose_rank_impl(*args, deferred)
             if len(args) == 1:
                 return ranking_response.model_id, ranking_response.chosen_action_id, list(zip(ranking_response.action_ids, ranking_response.probabilities)), ranking_response.event_id
             return ranking_response.model_id, ranking_response.chosen_action_id, list(zip(ranking_response.action_ids, ranking_response.probabilities))

diff --git a/reinforcement_learning/bindings/python/setup.py b/reinforcement_learning/bindings/python/setup.py
@@ -25,7 +25,7 @@
 )
 
 setuptools.setup(
-    version = '0.0.9',
+    version = '0.1.0',
     name = 'rl_client',
     url = 'https://github.com/JohnLangford/vowpal_wabbit',
     description = 'Python binding for reinforcement learning client library',

diff --git a/reinforcement_learning/examples/python/basic_usage.py b/reinforcement_learning/examples/python/basic_usage.py
@@ -26,8 +26,8 @@ def main():
     print("chosen action id: " + str(chosen_action_id))
     print("all action probabilities " + str(actions_probabilities))
 
-    model_id, chosen_action_id, actions_probabilities, event_id = model.choose_rank(context)
-
+    model_id, chosen_action_id, actions_probabilities, event_id = model.choose_rank(context, deferred = True)
+    model.report_action_taken(event_id)
     print("event_id: " + event_id)
     print("model_id: " + model_id)
     print("chosen action id: " + str(chosen_action_id))

diff --git a/reinforcement_learning/examples/test_cpp/test_data_provider.cc b/reinforcement_learning/examples/test_cpp/test_data_provider.cc
@@ -104,16 +104,20 @@ void test_data_provider::log(size_t thread_id, size_t example_id, const reinforc
   logger << R"({"_label_cost":)" << -get_outcome(thread_id, example_id) << R"(,"_label_probability":)" << prob << R"(,"_label_Action":)" << (action_id + 1) << R"(,"_labelIndex":)" << action_id << ",";
 
   if (is_rewarded(thread_id, example_id)) {
+    reinforcement_learning::outcome_event outcome_evt;
     if (is_float_outcome)
-      reinforcement_learning::outcome_event::serialize(buffer, get_event_id(thread_id, example_id), get_outcome(thread_id, example_id));
+      outcome_evt = reinforcement_learning::outcome_event::report_outcome(buffer, get_event_id(thread_id, example_id), get_outcome(thread_id, example_id));
     else
-      reinforcement_learning::outcome_event::serialize(buffer, get_event_id(thread_id, example_id), get_outcome_json(thread_id, example_id));
-
+      outcome_evt = reinforcement_learning::outcome_event::report_outcome(buffer, get_event_id(thread_id, example_id), get_outcome_json(thread_id, example_id));
+    buffer.reset();
+    outcome_evt.serialize(buffer);
     logger << R"("o":[)" << buffer.str() << "],";
     buffer.reset();
   }
 
-  reinforcement_learning::ranking_event::serialize(buffer, get_event_id(thread_id, example_id), get_context(thread_id, example_id), response);
+  auto ranking_evt = reinforcement_learning::ranking_event::choose_rank(buffer, get_event_id(thread_id, example_id), get_context(thread_id, example_id), reinforcement_learning::action_flags::DEFAULT, response);
+  buffer.reset();
+  ranking_evt.serialize(buffer);
   const std::string buffer_str = buffer.str();
   logger << buffer_str.substr(1, buffer_str.length() - 1) << "}" << std::endl;
 }

diff --git a/reinforcement_learning/include/constants.h b/reinforcement_learning/include/constants.h
@@ -28,6 +28,7 @@ namespace reinforcement_learning {  namespace name {
       const char *const  INTERACTION_SENDER_IMPLEMENTATION    = "interaction.sender.implementation";
       const char *const  EH_TEST                 = "eventhub.mock";
       const char *const  TRACE_LOG_IMPLEMENTATION = "trace.logger.implementation";
+      const char *const  QUEUE_MODE = "queue.mode";
 }}
 
 namespace reinforcement_learning {  namespace value {

diff --git a/reinforcement_learning/include/live_model.h b/reinforcement_learning/include/live_model.h
@@ -6,6 +6,7 @@
  * @date 2018-07-18
  */
 #pragma once
+#include "action_flags.h"
 #include "ranking_response.h"
 #include "err_constants.h"
 #include "factory_resolver.h"
@@ -115,7 +116,7 @@ namespace reinforcement_learning {
     * @param event_id  The unique identifier for this interaction.  The same event_id should be used when
     *                  reporting the outcome for this action.
     * @param context_json Contains action, action features and context features in json format
-    * @param flags Action flags (0 - default, 1 - deferred)
+    * @param flags Action flags (see action_flags.h)
     * @param resp Ranking response contains the chosen action, probability distribution used for sampling actions and ranked actions
     * @param status  Optional field with detailed string description if there is an error
     * @return int Return error code.  This will also be returned in the api_status object
@@ -128,7 +129,7 @@ namespace reinforcement_learning {
     * and then sampling from it.  A unique event_id will be generated and returned in the ranking_response.
     * The same event_id should be used when reporting the outcome for this action.
     * @param context_json Contains action, action features and context features in json format
-    * @param flags Action flags (0 - default, 1 - deferred)
+    * @param flags Action flags (see action_flags.h)
     * @param resp Ranking response contains the chosen action, probability distribution used for sampling actions and ranked actions
     * @param status  Optional field with detailed string description if there is an error
     * @return int Return error code.  This will also be returned in the api_status object

diff --git a/reinforcement_learning/rlclientlib/async_batcher.cc b/reinforcement_learning/rlclientlib/async_batcher.cc
@@ -0,0 +1,12 @@
+#include <cstring>
+#include "logger/async_batcher.h"
+
+namespace reinforcement_learning {
+    queue_mode_enum to_queue_mode_enum(const char* queue_mode) {
+    if (std::strcmp(queue_mode, "BLOCK") == 0) {
+      return BLOCK;
+    } else {
+      return DROP;
+    }
+  }
+};
diff --git a/reinforcement_learning/rlclientlib/live_model.cc b/reinforcement_learning/rlclientlib/live_model.cc
@@ -1,3 +1,4 @@
+#include "action_flags.h"
 #include "live_model.h"
 #include "live_model_impl.h"
 #include "err_constants.h"
@@ -43,34 +44,34 @@ namespace reinforcement_learning
                               api_status* status)
   {
     INIT_CHECK();
-    return _pimpl->choose_rank(event_id, context_json, response, status);
+    return choose_rank(event_id, context_json, action_flags::DEFAULT, response, status);
   }
 
   int live_model::choose_rank(const char* context_json, ranking_response& response, api_status* status)
   {
     INIT_CHECK();
-    return _pimpl->choose_rank(context_json, response, status);
+    return choose_rank(context_json, action_flags::DEFAULT, response, status);
   }
 
   //not implemented yet
   int live_model::choose_rank(const char* event_id, const char* context_json, unsigned int flags, ranking_response& response,
     api_status* status)
   {
     INIT_CHECK();
-    return _pimpl->choose_rank(event_id, context_json, response, status);
+    return _pimpl->choose_rank(event_id, context_json, flags, response, status);
   }
 
   //not implemented yet
   int live_model::choose_rank(const char* context_json, unsigned int flags, ranking_response& response, api_status* status)
   {
     INIT_CHECK();
-    return _pimpl->choose_rank(context_json, response, status);
+    return _pimpl->choose_rank(context_json, flags, response, status);
   }
 
   //not implemented yet
   int live_model::report_action_taken(const char* event_id, api_status* status) {
     INIT_CHECK();
-    return error_code::success;
+    return _pimpl->report_action_taken(event_id, status);
   }
 
   int live_model::report_outcome(const char* event_id, const char* outcome, api_status* status)

diff --git a/reinforcement_learning/rlclientlib/live_model_impl.cc b/reinforcement_learning/rlclientlib/live_model_impl.cc
@@ -49,7 +49,7 @@ namespace reinforcement_learning {
     return error_code::success;
   }
 
-  int live_model_impl::choose_rank(const char* event_id, const char* context, ranking_response& response,
+  int live_model_impl::choose_rank(const char* event_id, const char* context, unsigned int flags, ranking_response& response,
     api_status* status) {
     response.clear();
     //clear previous errors if any
@@ -65,7 +65,7 @@ namespace reinforcement_learning {
       RETURN_IF_FAIL(explore_exploit(event_id, context, response, status));
     }
     response.set_event_id(event_id);
-    RETURN_IF_FAIL(_ranking_logger->log(event_id, context, response, status));
+    RETURN_IF_FAIL(_ranking_logger->log(event_id, context, flags, response, status));
 
     // Check watchdog for any background errors. Do this at the end of function so that the work is still done.
     if (_watchdog.has_background_error_been_reported()) {
@@ -76,11 +76,18 @@ namespace reinforcement_learning {
   }
 
   //here the event_id is auto-generated
-  int live_model_impl::choose_rank(const char* context, ranking_response& response, api_status* status) {
-    return choose_rank(boost::uuids::to_string(boost::uuids::random_generator()()).c_str(), context, response,
+  int live_model_impl::choose_rank(const char* context, unsigned int flags, ranking_response& response, api_status* status) {
+    return choose_rank(boost::uuids::to_string(boost::uuids::random_generator()()).c_str(), context, flags, response,
       status);
   }
 
+  int live_model_impl::report_action_taken(const char* event_id, api_status* status) {
+    // Clear previous errors if any
+    api_status::try_clear(status);
+    // Send the outcome event to the backend
+    return _outcome_logger->report_action_taken(event_id, status);
+  }
+
   int live_model_impl::report_outcome(const char* event_id, const char* outcome, api_status* status) {
     // Check arguments
     RETURN_IF_FAIL(check_null_or_empty(event_id, outcome, status));