Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/@vw/master' into cs-simulator
Browse files Browse the repository at this point in the history
  • Loading branch information
lokitoth committed Oct 23, 2018
2 parents 1168aba + b3aa2c7 commit 7da0cb7
Show file tree
Hide file tree
Showing 30 changed files with 519 additions and 288 deletions.
23 changes: 7 additions & 16 deletions cs/setup/setup.wixproj
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props" Condition="Exists('..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
Expand All @@ -15,23 +15,14 @@
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
<OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
<IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
<DefineConstants>Debug</DefineConstants>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DefineConstants>Debug</DefineConstants>
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
<OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
<IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
<SuppressValidation>True</SuppressValidation>
</PropertyGroup>
<ItemGroup>
Expand Down
16 changes: 10 additions & 6 deletions cs/setup_bundle/Product.wxs
Expand Up @@ -8,7 +8,7 @@
<!-- .NET Dependency -->
<PackageGroupRef Id="NetFx45Web"/>
<!-- Visual Studio Runtime Dependency -->
<PackageGroupRef Id="redist_vc120"/>
<PackageGroupRef Id="redist_vc140"/>
<!-- Actual binaries-->
<PackageGroupRef Id="VowpalWabbitPackage"/>
</Chain>
Expand All @@ -24,15 +24,19 @@
</PackageGroup>
</Fragment>

<!--visual studio runtime 12-->
<!--visual studio runtime 14-->
<Fragment>
<PackageGroup Id="redist_vc120">
<ExePackage Id="vc120" Cache="yes" PerMachine="yes" Permanent="yes" Vital="yes" Compressed="yes"
SourceFile="C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\redist\1033\vcredist_x64.exe"
<PackageGroup Id="redist_vc140">
<ExePackage Id="vc140" Cache="yes" PerMachine="yes" Permanent="yes" Vital="yes" Compressed="yes"
SourceFile="C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\redist\1033\vcredist_x64.exe"
Name="vcredist_x64.exe"
InstallCommand="/quiet /norestart"
Protocol="burn"
/>
>
<!-- Below exit codes come from https://stackoverflow.com/questions/37396773/wix-burn-vcredist -->
<ExitCode Value="3010" Behavior="forceReboot"/>
<ExitCode Value="1638" Behavior="success"/> <!-- Ignore "Newer version installed" error -->
</ExePackage>

</PackageGroup>
</Fragment>
Expand Down
42 changes: 8 additions & 34 deletions cs/setup_bundle/setup_bundle.wixproj
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props" Condition="Exists('..\..\vowpalwabbit\packages\WiX.3.10.3\build\wix.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
Expand All @@ -15,41 +15,15 @@
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<OutputPath>bin\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
<OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
<IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
<DefineConstants>Debug</DefineConstants>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DefineConstants>Debug</DefineConstants>
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>..\..\vowpalwabbit\x64\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DefineConstants>Debug</DefineConstants>
<OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DefineConstants>Debug</DefineConstants>
<OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>bin\$(Platform)\$(Configuration)\</OutputPath>
<IntermediateOutputPath>obj\$(Platform)\$(Configuration)\</IntermediateOutputPath>
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
<OutputPath>$(SolutionDir)out\target\$(Configuration)\$(Platform)\</OutputPath>
<IntermediateOutputPath>$(SolutionDir)out\int\$(Configuration)\$(Platform)\$(ProjectName)</IntermediateOutputPath>
<SuppressValidation>True</SuppressValidation>
</PropertyGroup>
<ItemGroup>
<Compile Include="Product.wxs" />
Expand Down
2 changes: 1 addition & 1 deletion cs/version.props
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<VowpalWabbitAssemblyVersion>8.4.0.1</VowpalWabbitAssemblyVersion>
<VowpalWabbitAssemblyVersion>8.6.1.0</VowpalWabbitAssemblyVersion>
</PropertyGroup>
</Project>
16 changes: 12 additions & 4 deletions reinforcement_learning/bindings/python/py_api.cc
Expand Up @@ -93,24 +93,32 @@ namespace reinforcement_learning {
return response;
}

ranking_response live_model::choose_rank(const char* event_id, const char* context_json) {
ranking_response live_model::choose_rank(const char* event_id, const char* context_json, bool deferred) {
reinforcement_learning::ranking_response response_impl;
reinforcement_learning::api_status status;
_impl.choose_rank(event_id, context_json, response_impl, &status);
unsigned int flags = deferred ? action_flags::DEFERRED : action_flags::DEFAULT;
_impl.choose_rank(event_id, context_json, flags, response_impl, &status);
check_api_status(status);

return convert_ranking_response(response_impl);
}
// event_id is auto-generated.
ranking_response live_model::choose_rank(const char* context_json) {
ranking_response live_model::choose_rank(const char* context_json, bool deferred) {
reinforcement_learning::ranking_response response_impl;
reinforcement_learning::api_status status;
_impl.choose_rank(context_json, response_impl, &status);
unsigned int flags = deferred ? action_flags::DEFERRED : action_flags::DEFAULT;
_impl.choose_rank(context_json, flags, response_impl, &status);
check_api_status(status);

return convert_ranking_response(response_impl);
}

void live_model::report_action_taken(const char* event_id) {
reinforcement_learning::api_status status;
_impl.report_action_taken(event_id, &status);
check_api_status(status);
}

void live_model::report_outcome(const char* event_id, const char* outcome) {
reinforcement_learning::api_status status;
_impl.report_outcome(event_id, outcome, &status);
Expand Down
6 changes: 4 additions & 2 deletions reinforcement_learning/bindings/python/py_api.h
Expand Up @@ -34,9 +34,11 @@ namespace reinforcement_learning {

void init();

ranking_response choose_rank(const char* event_id, const char* context_json);
ranking_response choose_rank(const char* event_id, const char* context_json, bool deferred);
// event_id is auto-generated.
ranking_response choose_rank(const char* context_json);
ranking_response choose_rank(const char* context_json, bool deferred);

void report_action_taken(const char* event_id);

void report_outcome(const char* event_id, const char* outcome);
void report_outcome(const char* event_id, float outcome);
Expand Down
10 changes: 6 additions & 4 deletions reinforcement_learning/bindings/python/rl_client.i
Expand Up @@ -47,16 +47,18 @@ namespace reinforcement_learning {
void init();

%rename(choose_rank_impl) choose_rank;
reinforcement_learning::python::ranking_response choose_rank(const char* event_id, const char* context_json);
reinforcement_learning::python::ranking_response choose_rank(const char* event_id, const char* context_json, bool deferred);
// event_id is auto-generated.
reinforcement_learning::python::ranking_response choose_rank(const char* context_json);
reinforcement_learning::python::ranking_response choose_rank(const char* context_json, bool deferred);

void report_action_taken(const char* event_id);

void report_outcome(const char* event_id, const char* outcome);
void report_outcome(const char* event_id, float outcome);

%pythoncode %{
def choose_rank(self, *args):
ranking_response = self.choose_rank_impl(*args)
def choose_rank(self, *args, deferred = False):
ranking_response = self.choose_rank_impl(*args, deferred)
if len(args) == 1:
return ranking_response.model_id, ranking_response.chosen_action_id, list(zip(ranking_response.action_ids, ranking_response.probabilities)), ranking_response.event_id
return ranking_response.model_id, ranking_response.chosen_action_id, list(zip(ranking_response.action_ids, ranking_response.probabilities))
Expand Down
2 changes: 1 addition & 1 deletion reinforcement_learning/bindings/python/setup.py
Expand Up @@ -25,7 +25,7 @@
)

setuptools.setup(
version = '0.0.9',
version = '0.1.0',
name = 'rl_client',
url = 'https://github.com/JohnLangford/vowpal_wabbit',
description = 'Python binding for reinforcement learning client library',
Expand Down
4 changes: 2 additions & 2 deletions reinforcement_learning/examples/python/basic_usage.py
Expand Up @@ -26,8 +26,8 @@ def main():
print("chosen action id: " + str(chosen_action_id))
print("all action probabilities " + str(actions_probabilities))

model_id, chosen_action_id, actions_probabilities, event_id = model.choose_rank(context)

model_id, chosen_action_id, actions_probabilities, event_id = model.choose_rank(context, deferred = True)
model.report_action_taken(event_id)
print("event_id: " + event_id)
print("model_id: " + model_id)
print("chosen action id: " + str(chosen_action_id))
Expand Down
12 changes: 8 additions & 4 deletions reinforcement_learning/examples/test_cpp/test_data_provider.cc
Expand Up @@ -104,16 +104,20 @@ void test_data_provider::log(size_t thread_id, size_t example_id, const reinforc
logger << R"({"_label_cost":)" << -get_outcome(thread_id, example_id) << R"(,"_label_probability":)" << prob << R"(,"_label_Action":)" << (action_id + 1) << R"(,"_labelIndex":)" << action_id << ",";

if (is_rewarded(thread_id, example_id)) {
reinforcement_learning::outcome_event outcome_evt;
if (is_float_outcome)
reinforcement_learning::outcome_event::serialize(buffer, get_event_id(thread_id, example_id), get_outcome(thread_id, example_id));
outcome_evt = reinforcement_learning::outcome_event::report_outcome(buffer, get_event_id(thread_id, example_id), get_outcome(thread_id, example_id));
else
reinforcement_learning::outcome_event::serialize(buffer, get_event_id(thread_id, example_id), get_outcome_json(thread_id, example_id));

outcome_evt = reinforcement_learning::outcome_event::report_outcome(buffer, get_event_id(thread_id, example_id), get_outcome_json(thread_id, example_id));
buffer.reset();
outcome_evt.serialize(buffer);
logger << R"("o":[)" << buffer.str() << "],";
buffer.reset();
}

reinforcement_learning::ranking_event::serialize(buffer, get_event_id(thread_id, example_id), get_context(thread_id, example_id), response);
auto ranking_evt = reinforcement_learning::ranking_event::choose_rank(buffer, get_event_id(thread_id, example_id), get_context(thread_id, example_id), reinforcement_learning::action_flags::DEFAULT, response);
buffer.reset();
ranking_evt.serialize(buffer);
const std::string buffer_str = buffer.str();
logger << buffer_str.substr(1, buffer_str.length() - 1) << "}" << std::endl;
}
Expand Down
1 change: 1 addition & 0 deletions reinforcement_learning/include/constants.h
Expand Up @@ -28,6 +28,7 @@ namespace reinforcement_learning { namespace name {
const char *const INTERACTION_SENDER_IMPLEMENTATION = "interaction.sender.implementation";
const char *const EH_TEST = "eventhub.mock";
const char *const TRACE_LOG_IMPLEMENTATION = "trace.logger.implementation";
const char *const QUEUE_MODE = "queue.mode";
}}

namespace reinforcement_learning { namespace value {
Expand Down
5 changes: 3 additions & 2 deletions reinforcement_learning/include/live_model.h
Expand Up @@ -6,6 +6,7 @@
* @date 2018-07-18
*/
#pragma once
#include "action_flags.h"
#include "ranking_response.h"
#include "err_constants.h"
#include "factory_resolver.h"
Expand Down Expand Up @@ -115,7 +116,7 @@ namespace reinforcement_learning {
* @param event_id The unique identifier for this interaction. The same event_id should be used when
* reporting the outcome for this action.
* @param context_json Contains action, action features and context features in json format
* @param flags Action flags (0 - default, 1 - deferred)
* @param flags Action flags (see action_flags.h)
* @param resp Ranking response contains the chosen action, probability distribution used for sampling actions and ranked actions
* @param status Optional field with detailed string description if there is an error
* @return int Return error code. This will also be returned in the api_status object
Expand All @@ -128,7 +129,7 @@ namespace reinforcement_learning {
* and then sampling from it. A unique event_id will be generated and returned in the ranking_response.
* The same event_id should be used when reporting the outcome for this action.
* @param context_json Contains action, action features and context features in json format
* @param flags Action flags (0 - default, 1 - deferred)
* @param flags Action flags (see action_flags.h)
* @param resp Ranking response contains the chosen action, probability distribution used for sampling actions and ranked actions
* @param status Optional field with detailed string description if there is an error
* @return int Return error code. This will also be returned in the api_status object
Expand Down
12 changes: 12 additions & 0 deletions reinforcement_learning/rlclientlib/async_batcher.cc
@@ -0,0 +1,12 @@
#include <cstring>
#include "logger/async_batcher.h"

namespace reinforcement_learning {
queue_mode_enum to_queue_mode_enum(const char* queue_mode) {
if (std::strcmp(queue_mode, "BLOCK") == 0) {
return BLOCK;
} else {
return DROP;
}
}
};
11 changes: 6 additions & 5 deletions reinforcement_learning/rlclientlib/live_model.cc
@@ -1,3 +1,4 @@
#include "action_flags.h"
#include "live_model.h"
#include "live_model_impl.h"
#include "err_constants.h"
Expand Down Expand Up @@ -43,34 +44,34 @@ namespace reinforcement_learning
api_status* status)
{
INIT_CHECK();
return _pimpl->choose_rank(event_id, context_json, response, status);
return choose_rank(event_id, context_json, action_flags::DEFAULT, response, status);
}

int live_model::choose_rank(const char* context_json, ranking_response& response, api_status* status)
{
INIT_CHECK();
return _pimpl->choose_rank(context_json, response, status);
return choose_rank(context_json, action_flags::DEFAULT, response, status);
}

//not implemented yet
int live_model::choose_rank(const char* event_id, const char* context_json, unsigned int flags, ranking_response& response,
api_status* status)
{
INIT_CHECK();
return _pimpl->choose_rank(event_id, context_json, response, status);
return _pimpl->choose_rank(event_id, context_json, flags, response, status);
}

//not implemented yet
int live_model::choose_rank(const char* context_json, unsigned int flags, ranking_response& response, api_status* status)
{
INIT_CHECK();
return _pimpl->choose_rank(context_json, response, status);
return _pimpl->choose_rank(context_json, flags, response, status);
}

//not implemented yet
int live_model::report_action_taken(const char* event_id, api_status* status) {
INIT_CHECK();
return error_code::success;
return _pimpl->report_action_taken(event_id, status);
}

int live_model::report_outcome(const char* event_id, const char* outcome, api_status* status)
Expand Down
15 changes: 11 additions & 4 deletions reinforcement_learning/rlclientlib/live_model_impl.cc
Expand Up @@ -49,7 +49,7 @@ namespace reinforcement_learning {
return error_code::success;
}

int live_model_impl::choose_rank(const char* event_id, const char* context, ranking_response& response,
int live_model_impl::choose_rank(const char* event_id, const char* context, unsigned int flags, ranking_response& response,
api_status* status) {
response.clear();
//clear previous errors if any
Expand All @@ -65,7 +65,7 @@ namespace reinforcement_learning {
RETURN_IF_FAIL(explore_exploit(event_id, context, response, status));
}
response.set_event_id(event_id);
RETURN_IF_FAIL(_ranking_logger->log(event_id, context, response, status));
RETURN_IF_FAIL(_ranking_logger->log(event_id, context, flags, response, status));

// Check watchdog for any background errors. Do this at the end of function so that the work is still done.
if (_watchdog.has_background_error_been_reported()) {
Expand All @@ -76,11 +76,18 @@ namespace reinforcement_learning {
}

//here the event_id is auto-generated
int live_model_impl::choose_rank(const char* context, ranking_response& response, api_status* status) {
return choose_rank(boost::uuids::to_string(boost::uuids::random_generator()()).c_str(), context, response,
int live_model_impl::choose_rank(const char* context, unsigned int flags, ranking_response& response, api_status* status) {
return choose_rank(boost::uuids::to_string(boost::uuids::random_generator()()).c_str(), context, flags, response,
status);
}

int live_model_impl::report_action_taken(const char* event_id, api_status* status) {
// Clear previous errors if any
api_status::try_clear(status);
// Send the outcome event to the backend
return _outcome_logger->report_action_taken(event_id, status);
}

int live_model_impl::report_outcome(const char* event_id, const char* outcome, api_status* status) {
// Check arguments
RETURN_IF_FAIL(check_null_or_empty(event_id, outcome, status));
Expand Down

0 comments on commit 7da0cb7

Please sign in to comment.