Skip to content

Commit

Permalink
nlp work
Browse files Browse the repository at this point in the history
  • Loading branch information
djmott committed Aug 16, 2016
1 parent 78445f0 commit 7b9bed8
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 20 deletions.
7 changes: 5 additions & 2 deletions examples/example_nlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace commands{

xtd::nlp::english::pointer _english;

#if 0
int main(){
using namespace xtd;

Expand All @@ -42,9 +43,11 @@ int main(){
return 0;

}
#if 0
#else
int main(){
_english = xtd::nlp::english::pointer(new xtd::nlp::english("/home/davidmott/Downloads/Moby", "/home/davidmott/Downloads/WordNet-3.0/dict"));

xtd::nlp::wordnet::database oDB;


std::cout << "Chatty Cathy: Hello! What can I do for you?" << std::endl;

Expand Down
3 changes: 1 addition & 2 deletions include/xtd/nlp/english.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace xtd{
const lemma::lemmata& lemmata() const { return _lemmata; }


english(const xtd::filesystem::path& oMobyPath, const xtd::filesystem::path& oWordnetPath) : _moby(oMobyPath), _wordnet(oWordnetPath){
english() : _wordnet(){
for (const auto & oRecord : _wordnet._data_adj.records){
for (const auto & oWord : oRecord.second.words){
std::cout << oWord.word << std::endl;
Expand All @@ -40,7 +40,6 @@ namespace xtd{
}

private:
moby::database _moby;
wordnet::database _wordnet;
lemma::lemmata _lemmata;
};
Expand Down
7 changes: 3 additions & 4 deletions include/xtd/nlp/nlp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,10 @@ namespace xtd{
using type = _Ty;
};

}





}

struct BOW{
std::map<std::string, size_t> operator()(const std::vector<xtd::string>& oDoc) const{
std::map<std::string, size_t> oRet;
Expand All @@ -99,13 +96,15 @@ namespace xtd{
}
};


struct SimpleParse{
std::vector<xtd::string> operator()(const std::string& src) const {
return xtd::string(src).split({'.', '!', '?', ' ', ','});
}
};



}
}

Expand Down
31 changes: 19 additions & 12 deletions include/xtd/nlp/wordnet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ c++ interface to wordnet databases

#include <map>
#include <future>
#include <sstream>

#include <xtd/filesystem.hpp>

Expand Down Expand Up @@ -52,8 +53,13 @@ namespace xtd
using map = std::map<size_t, record>;
size_t file_offset;
std::string lemma, pos, synset_cnt, p_cnt, ptr_symbol, sense_cnt, tagsense_cnt, synset_offset;
bool load(const std::string& , size_t & ) {
return false;
bool load(const std::string& str, size_t & i) {
auto x = str.find('\n', i);
std::stringstream oSS;
oSS.str(std::string(&str[i], &str[x]));
oSS >> lemma >> pos >> synset_cnt >> p_cnt >> ptr_symbol >> sense_cnt >> tagsense_cnt >> synset_offset;
i = x;
return true;
}
};

Expand Down Expand Up @@ -202,35 +208,36 @@ namespace xtd
index_file _index_noun;
index_file _index_verb;

database(const xtd::filesystem::path& oPath) {
database() {
auto make_path = [&](const char * sAddend){
xtd::filesystem::path oRet(oPath);
xtd::filesystem::path oRet(XTD_ASSETS_DIR "/WordNet-3.0/dict");
oRet /= sAddend;
oRet.make_preferred();
return oRet;
};
auto t1 = std::async(std::launch::async, [&]() {
return make_path("data.adj");
return _data_adj.load(make_path("data.adj"));
});
auto t2 = std::async(std::launch::async, [&]() {
return make_path("data.adv");
return _data_adv.load(make_path("data.adv"));
});
auto t3 = std::async(std::launch::async, [&]() {
return make_path("data.noun");
return _data_noun.load(make_path("data.noun"));
});
auto t4 = std::async(std::launch::async, [&]() {
return make_path("data.verb");
return _data_verb.load(make_path("data.verb"));
});
auto t5 = std::async(std::launch::async, [&]() {
return make_path("index.adj");
return _index_adj.load(make_path("index.adj"));
});
auto t6 = std::async(std::launch::async, [&]() {
return make_path("index.adv");
return _index_adv.load(make_path("index.adv"));
});
auto t7 = std::async(std::launch::async, [&]() {
return make_path("index.noun");
return _index_noun.load(make_path("index.noun"));
});
auto t8 = std::async(std::launch::async, [&]() {
return make_path("index.verb");
return _index_verb.load(make_path("index.verb"));
});
t1.get();
t2.get();
Expand Down
4 changes: 4 additions & 0 deletions include/xtd/xtd.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
#define DOXY_INVOKED 0
#endif

#if !defined(XTD_ASSETS_DIR)
#define XTD_ASSETS_DIR "@CMAKE_CURRENT_SOURCE_DIR@/assets"
#endif

#if !defined(XTD_HAS_LIBUUID)
#cmakedefine01 XTD_HAS_LIBUUID
#endif
Expand Down

0 comments on commit 7b9bed8

Please sign in to comment.