Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ataymano/c wrapper fix2 #1859

Merged
merged 20 commits into from Jun 4, 2019
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions test/train-sets/ref/0151.stderr
Expand Up @@ -7,15 +7,15 @@ Reading datafile = train-sets/0080.dat
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.596384 0.596384 1 1.0 1.0000 0.2277 5
1.618849 2.641314 2 2.0 2.0000 0.3748 5
1.000556 0.382262 4 4.0 2.0000 2.0000 5
0.484530 0.484530 1 1.0 1.0000 0.3039 5
1.414942 2.345354 2 2.0 2.0000 0.4685 5
0.902065 0.389187 4 4.0 2.0000 2.0000 5

finished run
number of examples = 4
weighted example sum = 4.000000
weighted label sum = 6.000000
average loss = 1.000556
average loss = 0.902065
best constant = 1.500000
best constant's loss = 0.250000
total feature number = 18
5 changes: 5 additions & 0 deletions test/unit_test/CMakeLists.txt
Expand Up @@ -7,6 +7,11 @@ add_executable(vw-unit-test.out
target_include_directories(vw-unit-test.out PRIVATE $<TARGET_PROPERTY:vw,INCLUDE_DIRECTORIES>)
target_link_libraries(vw-unit-test.out PRIVATE vw allreduce Boost::unit_test_framework Boost::system)

if(NOT DEFINED DO_NOT_BUILD_VW_C_WRAPPER)
target_sources(vw-unit-test.out PUBLIC vwdll_test.cc)
target_link_libraries(vw-unit-test.out PRIVATE vw_c_wrapper)
endif()

# Communicate that Boost Unit Test is being statically linked
if(STATIC_LINK_VW)
target_compile_definitions(vw-unit-test.out PRIVATE STATIC_LINK_VW)
Expand Down
348 changes: 176 additions & 172 deletions test/unit_test/unit_test.vcxproj

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions test/unit_test/unit_test.vcxproj.filters
Expand Up @@ -36,6 +36,9 @@
<ClCompile Include="object_pool_test.cc">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="vwdll_test.cc">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
Expand Down
77 changes: 77 additions & 0 deletions test/unit_test/vwdll_test.cc
@@ -0,0 +1,77 @@

#ifndef STATIC_LINK_VW
#define BOOST_TEST_DYN_LINK
#endif

#include <boost/test/unit_test.hpp>

#include "vwdll.h"
#include "vw.h"

template<class T>
void check_weights_equal(T& first, T& second)
{
auto secondIt = second.begin();
for (auto firstIt : first)
{
BOOST_CHECK_EQUAL(firstIt, *secondIt);
++secondIt;
}
BOOST_CHECK_EQUAL(secondIt == second.end(), true);
}

BOOST_AUTO_TEST_CASE(vw_dll_parsed_and_constructed_example_parity)
{
//parse example
VW_HANDLE handle1 = VW_InitializeA("-q st --noconstant --quiet");
VW_EXAMPLE example_parsed;
example_parsed = VW_ReadExampleA(handle1, "1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme");

//construct example
VW_HANDLE handle2 = VW_InitializeA("-q st --noconstant --quiet");
VW_EXAMPLE example_constructed;
auto fs = VW_InitializeFeatureSpaces(2);

auto first = VW_GetFeatureSpace(fs, 0);
VW_InitFeatures(first, 3);
auto shash = VW_SetFeatureSpace(handle2, first, "s");
VW_SetFeature(VW_GetFeature(first, 0), VW_HashFeatureA(handle2, "p^the_man", shash), 1.0f);
VW_SetFeature(VW_GetFeature(first, 1), VW_HashFeatureA(handle2, "w^the", shash), 1.0f);
VW_SetFeature(VW_GetFeature(first, 2), VW_HashFeatureA(handle2, "w^man", shash), 1.0f);

auto second = VW_GetFeatureSpace(fs, 1);
VW_InitFeatures(second, 3);
auto thash = VW_SetFeatureSpace(handle2, second, "t");
VW_SetFeature(VW_GetFeature(second, 0), VW_HashFeatureA(handle2, "p^un_homme", thash), 1.0f);
VW_SetFeature(VW_GetFeature(second, 1), VW_HashFeatureA(handle2, "w^un", thash), 1.0f);
VW_SetFeature(VW_GetFeature(second, 2), VW_HashFeatureA(handle2, "w^homme", thash), 1.0f);

example_constructed = VW_ImportExample(handle2, "1", fs, 2);


// learn both
auto score_parsed = VW_Learn(handle1, example_parsed);
auto score_constructed = VW_Learn(handle2, example_parsed);


//check parity
BOOST_CHECK_EQUAL(score_parsed, score_constructed);
auto vw1 = static_cast<vw*>(handle1);
auto vw2 = static_cast<vw*>(handle2);

BOOST_CHECK_EQUAL(vw1->weights.sparse, vw2->weights.sparse);

if (vw1->weights.sparse) {
check_weights_equal(vw1->weights.sparse_weights, vw2->weights.sparse_weights);
}
else {
check_weights_equal(vw1->weights.dense_weights, vw2->weights.dense_weights);
}

VW_ReleaseFeatureSpace(fs, 2);

VW_Finish(handle1);
VW_Finish(handle2);
}


36 changes: 19 additions & 17 deletions vowpalwabbit/parse_example.cc
Expand Up @@ -69,6 +69,7 @@ class TC_parser
bool* spelling_features;
v_array<char> spelling;
uint32_t hash_seed;
uint64_t parse_mask;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you be able to explain what the parse_mask is for?


vector<feature_dict*>* namespace_dictionaries;

Expand Down Expand Up @@ -138,7 +139,7 @@ class TC_parser
v = cur_channel_v * featureValue();
uint64_t word_hash;
if (feature_name.end != feature_name.begin)
word_hash = (p->hasher(feature_name, channel_hash));
word_hash = (p->hasher(feature_name, channel_hash) & parse_mask);
else
word_hash = channel_hash + anon++;
if (v == 0)
Expand Down Expand Up @@ -407,6 +408,7 @@ class TC_parser
this->namespace_dictionaries = all.namespace_dictionaries;
this->base = nullptr;
this->hash_seed = all.hash_seed;
this->parse_mask = all.parse_mask;
listNameSpace();
if (base != nullptr)
free(base);
Expand Down Expand Up @@ -457,21 +459,22 @@ void substring_to_example(vw* all, example* ae, substring example)
TC_parser<false> parser_line(bar_location, example.end, *all, ae);
}

std::vector<std::string> split(char* phrase, std::string delimiter){
std::vector<std::string> list;
std::string s = std::string(phrase);
size_t pos = 0;
std::string token;
while ((pos = s.find(delimiter)) != std::string::npos) {
token = s.substr(0, pos);
list.push_back(token);
s.erase(0, pos + delimiter.length());
}
list.push_back(s);
return list;
std::vector<std::string> split(char* phrase, std::string delimiter)
{
std::vector<std::string> list;
std::string s = std::string(phrase);
size_t pos = 0;
std::string token;
while ((pos = s.find(delimiter)) != std::string::npos)
{
token = s.substr(0, pos);
list.push_back(token);
s.erase(0, pos + delimiter.length());
}
list.push_back(s);
return list;
}


namespace VW
{
void read_line(vw& all, example* ex, char* line)
Expand All @@ -484,16 +487,15 @@ void read_line(vw& all, example* ex, char* line)
void read_lines(vw* all, char* line, size_t /*len*/, v_array<example*>& examples)
{
auto lines = split(line, "\n");
for(size_t i = 0; i < lines.size(); i++)
for (size_t i = 0; i < lines.size(); i++)
{
// Check if a new empty example needs to be added.
if(examples.size() < i + 1)
if (examples.size() < i + 1)
{
examples.push_back(&VW::get_unused_example(all));
}
read_line(*all, examples[i], const_cast<char*>(lines[i].c_str()));
}
}


} // namespace VW
41 changes: 39 additions & 2 deletions vowpalwabbit/vwdll.cpp
Expand Up @@ -77,19 +77,30 @@ VW_DLL_MEMBER void VW_CALLING_CONV VW_Finish(VW_HANDLE handle)
VW::finish(*pointer);
}

VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample(VW_HANDLE handle, const char * label, VW_FEATURE_SPACE* features, size_t len)
VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample(VW_HANDLE handle, const char * label, VW_FEATURE_SPACE features, size_t len)
{ vw * pointer = static_cast<vw*>(handle);
VW::primitive_feature_space * f = reinterpret_cast<VW::primitive_feature_space*>( features );
return static_cast<VW_EXAMPLE>(VW::import_example(*pointer, label, f, len));
}

VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpaces(size_t len)
{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these needed precisely?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In other words, what is the missing functionality amongst existing interfaces?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With existing API the only way to "construct" example - VwExample ImportExample(FeatureSpace, Label);
But the only way to get feature space - FeatureSpace ExportExample(VwExample).
So I do not see any way to construct example without introducing functions required for initializing, adding features / namespaces etc to feature space. Please correct me if I am wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the question is: What is the api? Is it vwdll.cpp? Or is it vw.h? To the extent that vwdll.cpp requires additional functions exposing vw.h that seems fine. To the extent that vw.h is missing things, I'd like to understand more deeply. Is vw.h missing things? If not, can these extra interface functions just call the vw.h interface?

Copy link
Member Author

@ataymano ataymano Jun 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding that cpp api is vw.h while c one is vwdll.
My previous comment was about vwdll.
vw.h is not missing anything - at least allows to create example, since it is exposing full primitive_feature_space implementation:

struct primitive_feature_space // just a helper definition.

So given current vw.h code, direct manipulations with primitive_feature space is technically "using vw.h interface".
vw.h can be refactored to hide its feature_space implementation details behind the interfaces, but still looks like corresponding flat c methods have to be added to vwdll.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good. So, can we make sure the added vwdll functions use functions in vw.h? Currently, it seems they are not, so this is sort-of creating an alternate interface.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, let me try.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sent the update draft, please take a look.

return static_cast<VW_FEATURE_SPACE>(new VW::primitive_feature_space[len]);
}

VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_GetFeatureSpace(VW_FEATURE_SPACE first, size_t index)
{
VW::primitive_feature_space* f = reinterpret_cast<VW::primitive_feature_space*>(first);
return static_cast<VW_FEATURE_SPACE>(&f[index]);
}

VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_ExportExample(VW_HANDLE handle, VW_EXAMPLE e, size_t * plen)
{ vw* pointer = static_cast<vw*>(handle);
example* ex = static_cast<example*>(e);
return static_cast<VW_FEATURE_SPACE>(VW::export_example(*pointer, ex, *plen));
}

VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE* features, size_t len)
VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE features, size_t len)
{ VW::primitive_feature_space * f = reinterpret_cast<VW::primitive_feature_space*>( features );
VW::releaseFeatureSpace(f, len);
}
Expand Down Expand Up @@ -164,6 +175,32 @@ VW_DLL_MEMBER float VW_CALLING_CONV VW_GetConfidence(VW_EXAMPLE e)
{ return VW::get_confidence(static_cast<example*>(e));
}

VW_DLL_MEMBER size_t VW_CALLING_CONV VW_SetFeatureSpace(VW_HANDLE handle, VW_FEATURE_SPACE feature_space, const char* name)
{ VW::primitive_feature_space* f = reinterpret_cast<VW::primitive_feature_space*>(feature_space);
f->name = *name;
return VW_HashSpaceA(handle, name);
}

VW_DLL_MEMBER void VW_CALLING_CONV VW_InitFeatures(VW_FEATURE_SPACE feature_space, size_t features_count)
{
VW::primitive_feature_space* fs = reinterpret_cast<VW::primitive_feature_space*>(feature_space);
fs->fs = new feature[features_count];
fs->len = features_count;
}

VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeature(VW_FEATURE_SPACE feature_space, size_t index)
{
VW::primitive_feature_space* fs = reinterpret_cast<VW::primitive_feature_space*>(feature_space);
return &(fs->fs[index]);
}

VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature(VW_FEATURE f, size_t feature_hash, float value)
{
feature* _feature = reinterpret_cast<feature*>(f);
_feature->weight_index = feature_hash;
_feature->x = value;
}

VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeatures(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen)
{ vw* pointer = static_cast<vw*>(handle);
return VW::get_features(*pointer, static_cast<example*>(e), *plen);
Expand Down
12 changes: 10 additions & 2 deletions vowpalwabbit/vwdll.h
Expand Up @@ -73,10 +73,12 @@ extern "C"
VW_DLL_MEMBER void VW_CALLING_CONV VW_Finish(VW_HANDLE handle);

VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample(
VW_HANDLE handle, const char* label, VW_FEATURE_SPACE* features, size_t len);
VW_HANDLE handle, const char* label, VW_FEATURE_SPACE features, size_t len);

VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpaces(size_t len);
VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_GetFeatureSpace(VW_FEATURE_SPACE first, size_t index);
VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_ExportExample(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen);
VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE* features, size_t len);
VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE features, size_t len);
#ifdef USE_CODECVT
VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char16_t* line);
#endif
Expand All @@ -100,6 +102,10 @@ extern "C"
VW_DLL_MEMBER const char* VW_CALLING_CONV VW_GetTag(VW_EXAMPLE e);
VW_DLL_MEMBER size_t VW_CALLING_CONV VW_GetFeatureNumber(VW_EXAMPLE e);
VW_DLL_MEMBER float VW_CALLING_CONV VW_GetConfidence(VW_EXAMPLE e);
VW_DLL_MEMBER size_t VW_CALLING_CONV VW_SetFeatureSpace(VW_HANDLE handle, VW_FEATURE_SPACE feature_space, const char* name);
VW_DLL_MEMBER void VW_CALLING_CONV VW_InitFeatures(VW_FEATURE_SPACE feature_space, size_t features_count);
VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeature(VW_FEATURE_SPACE feature_space, size_t index);
VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature(VW_FEATURE feature, size_t feature_hash, float value);
VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeatures(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen);
VW_DLL_MEMBER void VW_CALLING_CONV VW_ReturnFeatures(VW_FEATURE f);
#ifdef USE_CODECVT
Expand All @@ -120,7 +126,9 @@ extern "C"
VW_DLL_MEMBER float VW_CALLING_CONV VW_Learn(VW_HANDLE handle, VW_EXAMPLE e);
VW_DLL_MEMBER float VW_CALLING_CONV VW_Predict(VW_HANDLE handle, VW_EXAMPLE e);
VW_DLL_MEMBER float VW_CALLING_CONV VW_PredictCostSensitive(VW_HANDLE handle, VW_EXAMPLE e);
//deprecated. Please use either VW_ReadExample for parsing, or VW_ImportExample for example construction
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not deprecated, right?

VW_DLL_MEMBER void VW_CALLING_CONV VW_AddLabel(VW_EXAMPLE e, float label, float weight, float base);
// deprecated. Please use either VW_ReadExample for parsing, or VW_ImportExample for example construction
VW_DLL_MEMBER void VW_CALLING_CONV VW_AddStringLabel(VW_HANDLE handle, VW_EXAMPLE e, const char* label);

VW_DLL_MEMBER float VW_CALLING_CONV VW_Get_Weight(VW_HANDLE handle, size_t index, size_t offset);
Expand Down