Skip to content

Commit

Permalink
Merge pull request #2 from intersystems/master
Browse files Browse the repository at this point in the history
Updating to latest
  • Loading branch information
JosDenysGitHub committed Apr 21, 2020
2 parents 0732698 + a7de24b commit 9c2fc4f
Show file tree
Hide file tree
Showing 30 changed files with 276 additions and 256 deletions.
36 changes: 35 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,38 @@
# VS generated files
*.sdf
*.opensdf
*.suo
*.suo
/modules/aho/model_common.vcxproj.user
/modules/aho/model1_uk.vcxproj.user
/modules/aho/model1_sv.vcxproj.user
/modules/aho/model1_ru.vcxproj.user
/modules/aho/model1_pt.vcxproj.user
/modules/aho/model1_nl.vcxproj.user
/modules/aho/model1_ja.vcxproj.user
/modules/aho/model1_fr.vcxproj.user
/modules/aho/model1_es.vcxproj.user
/modules/aho/model1_en.vcxproj.user
/modules/aho/model1_de.vcxproj.user
/modules/aho/model1_cs.vcxproj.user
/modules/aho/model0_uk.vcxproj.user
/modules/aho/model0_sv.vcxproj.user
/modules/aho/model0_ru.vcxproj.user
/modules/aho/model0_pt.vcxproj.user
/modules/aho/model0_nl.vcxproj.user
/modules/aho/model0_ja.vcxproj.user
/modules/aho/model0_fr.vcxproj.user
/modules/aho/model0_es.vcxproj.user
/modules/aho/model0_en.vcxproj.user
/modules/aho/model0_de.vcxproj.user
/modules/aho/model0_cs.vcxproj.user
/modules/ali/ali.vcxproj.user
/modules/base/base.vcxproj.user
/.vs
/modules/core/core.vcxproj.user
/modules/engine/engine.vcxproj.user
/modules/enginetest/iknowdata.log
*.log
*.user
/modules/built/x64
/modules/.vs
*.filters
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ all : engine
test : enginetest
$(ROOT_DIR)/kit/$(PLATFORM)/$(MODE)/bin/iknowenginetest

languagecompiler : base core shell
$(MAKE) -f $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler/languagecompiler.mak
enginetest : engine base
$(MAKE) -f $(ROOT_DIR)/modules/enginetest/enginetest.mak
engine : base shell core icu
Expand All @@ -33,6 +35,7 @@ icu :
test -d $(ICUDIR)/include/unicode && (ls $(ICUDIR)/lib/libicu* > /dev/null)

clean :
$(MAKE) -f $(ROOT_DIR)/modules/compiler/iKnowLanguageCompiler/languagecompiler.mak clean
$(MAKE) -f $(ROOT_DIR)/modules/enginetest/enginetest.mak clean
$(MAKE) -f $(ROOT_DIR)/modules/engine/engine.mak clean
$(MAKE) -f $(ROOT_DIR)/modules/shell/shell.mak clean
Expand Down
73 changes: 39 additions & 34 deletions modules/compiler/iKnowLanguageCompiler/CSV_DataGenerator.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
/*
** CSV_DataGenerator.cpp
*/

#ifdef WIN32
#pragma warning (disable: 4251)
#endif

#include "CSV_DataGenerator.h"
#include "Util.h"
#include "IkStringEncoding.h"

#include <fstream>
#include <sstream>
#include <algorithm>

using namespace iknow::csvdata;
using namespace std;
using iknow::base::IkStringEncoding;
using iknow::base::String;
using namespace iknow::core;

vector<iKnow_KB_Metadata> CSV_DataGenerator::kb_metadata;
vector<iKnow_KB_Acronym> CSV_DataGenerator::kb_acronyms;
Expand Down Expand Up @@ -38,10 +50,6 @@ CSV_DataGenerator::~CSV_DataGenerator()
{
}

#include "IkStringEncoding.h"
using iknow::base::IkStringEncoding;
using iknow::base::String;
using namespace iknow::core;

iknow::base::String CSV_DataGenerator::GetSpecialLabel(SpecialLabel label) {
switch (label) {
Expand Down Expand Up @@ -159,7 +167,7 @@ void CSV_DataGenerator::loadCSVdata(std::string language, bool IsCompiled)
*/
kb_language = language;
Hash = language; // just a unique string per KB
cout << "Loading CSV data for language \"" << language << "\"";
cout << "Loading CSV data for language \"" << language << "\"" << endl;;

kb_metadata.clear();
size_t cap = kb_metadata.capacity();
Expand Down Expand Up @@ -188,16 +196,14 @@ void CSV_DataGenerator::loadCSVdata(std::string language, bool IsCompiled)
kb_labels.clear();
cap = kb_labels.capacity();
cout << "Reading label data..." << endl;
iKnow_KB_Label::ImportFromCSV(csv_path_ + language + "\\" + "labels.csv", *this);
if (!iKnow_KB_Label::ImportFromCSV(csv_path_ + language + "\\" + "labels.csv", *this))
throw ExceptionFrom<CSV_DataGenerator>("Cannot build a language model without external labels !!!");
cout << kb_labels.size() << " label items (reserved=" << cap << ")" << endl;

if (!IsCompiled) {
kb_lexreps.clear();
lexrep_index.clear();
cap = kb_lexreps.capacity();
cout << "Reading lexrep data..." << endl;
iKnow_KB_Lexrep::ImportFromCSV(csv_path_ + language + "\\" + "lexreps.csv", *this);
cout << kb_lexreps.size() << " lexrep items (reserved=" << cap << ")" << endl;
cout << endl << kb_lexreps.size() << " lexrep items (reserved=" << cap << ")" << endl;
}
kb_prepro.clear();
cap = kb_prepro.capacity();
Expand Down Expand Up @@ -249,7 +255,7 @@ static const size_t kRawSize = 48000000;
#include "KbMetadata.h"

// static definition:
const IkLabel::LabelTypeMap IkLabel::label_type_map_;
// const IkLabel::LabelTypeMap IkLabel::label_type_map_;

using namespace iknow::shell;

Expand Down Expand Up @@ -644,16 +650,17 @@ void CSV_DataGenerator::generateRAW(void)
allocator.generate_image(language_data_path_, GetName());
#endif

delete buf_;
delete[] buf_;
}


class RegexPredicate : public Predicate // Class %iKnow.Compiler.RegexPredicate Extends %iKnow.Compiler.Predicate[Hidden, System = 4]
{
public:
bool MatchRegex; // Property MatchRegex As %Boolean; /// If true, matches regex lexreps. If false, matches non-regex lexreps
static bool MatchRegex; // Property MatchRegex As %Boolean; /// If true, matches regex lexreps. If false, matches non-regex lexreps

bool Check(iKnow_KB_Lexrep& obj) { // Method Check(obj As %iKnow.KB.Lexrep) As %Boolean
string token = obj.Token; // Set token = obj.Token
static bool Check(iKnow_KB_Lexrep& obj) { // Method Check(obj As %iKnow.KB.Lexrep) As %Boolean
const string &token = obj.Token; // Set token = obj.Token
// bool foundOutput = MatchRegex; // Set foundOutput = ..MatchRegex
bool escaping = false; // Set escaping = 0
// Set len = $L(token)
Expand All @@ -665,30 +672,27 @@ class RegexPredicate : public Predicate // Class %iKnow.Compiler.RegexPredicate
}
return !MatchRegex; // Quit '..MatchRegex
}
void operator()(iKnow_KB_Lexrep& lexrep) {
lexrep.isRegex = Check(lexrep);
}
};
bool RegexPredicate::MatchRegex = true; // scan for regular expressions.

void CSV_DataGenerator::generateAHO(void)
{
/*
std::ofstream ofs = std::ofstream("C:/tmp/output_state1.csv", std::ofstream::trunc); // clean session logging file
if (ofs.is_open())
ofs.close();
ofs = std::ofstream("C:/tmp/output_failure.csv", std::ofstream::trunc);
if (ofs.is_open())
ofs.close();
*/

cout << "Compiling " << GetName() << " outputDir=\"" << aho_path_ << "\"" << std::endl;
string outputDir = aho_path_ + "/inl/" + GetName() + "/lexrep";

RegexPredicate *predicate = new RegexPredicate; // // Set predicate = ##class(RegexPredicate).%New()
for_each(kb_lexreps.begin(), kb_lexreps.end(), RegexPredicate());

//RegexPredicate *predicate = new RegexPredicate; // // Set predicate = ##class(RegexPredicate).%New()
//First the non-regexes
predicate->MatchRegex = false;
bool predicate = false; //predicate->MatchRegex = false;
CompileLexrepDictionaryPhase(/*kb,*/ "", predicate, outputDir); // Do ..CompileLexrepDictionaryPhase(kb, "", predicate, outputDir)
//Now the regexes
predicate->MatchRegex = true; // Set predicate.MatchRegex = 1
predicate = true; //predicate->MatchRegex = true; // Set predicate.MatchRegex = 1
CompileLexrepDictionaryPhase(/*kb,*/ "_regex", predicate, outputDir); // Do ..CompileLexrepDictionaryPhase(kb, "_regex", predicate, outputDir)
delete predicate;
// delete predicate;
}

#include "GotoFunction.h"
Expand All @@ -698,7 +702,7 @@ void CSV_DataGenerator::generateAHO(void)
#include "LexrepStateOutputFunc.h"
#include "MetadataTable.h"

vector<int> CSV_DataGenerator::CreateLabelsIndexVector(iKnow_KB_Lexrep& lexrep, std::map<string, int>& table) // ClassMethod CreateLabelsIndexVector(lexrep As %iKnow.KB.Lexrep, ByRef table As %String) As %List[Private]
vector<int> CSV_DataGenerator::CreateLabelsIndexVector(iKnow_KB_Lexrep& lexrep, std::unordered_map<string, int>& table) // ClassMethod CreateLabelsIndexVector(lexrep As %iKnow.KB.Lexrep, ByRef table As %String) As %List[Private]
{
vector<string> labelList = split_row(lexrep.Labels, ';'); // Set labelList = lexrep.GetLabels()
int labelListLen = (int)labelList.size(); // Set labelListLen = $Length(labelList, ";")
Expand All @@ -716,7 +720,7 @@ vector<int> CSV_DataGenerator::CreateLabelsIndexVector(iKnow_KB_Lexrep& lexrep,
return indexList;
}

void CSV_DataGenerator::CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledgebase,*/ string phase, Predicate *phasePredicate, string& outputDir_)
void CSV_DataGenerator::CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledgebase,*/ string phase, bool phase_switch /*Predicate *phasePredicate*/, string& outputDir_)
{
cout << "ComileLexepDictionaryPhase " << phase << endl;

Expand All @@ -729,8 +733,8 @@ void CSV_DataGenerator::CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledge
bool hasRegex = (phase == "_regex"); // Set hasRegex = (phase = "_regex")

iknow::AHO::GotoFunction *gotoFunc = new iknow::AHO::GotoFunction; // Set gotoFunc = ##class(GotoFunction).%New()
RegexPredicate *regex_predicate = dynamic_cast<RegexPredicate*>(phasePredicate);
gotoFunc->RegexEnabled = regex_predicate->MatchRegex;
// RegexPredicate *regex_predicate = dynamic_cast<RegexPredicate*>(phasePredicate);
gotoFunc->RegexEnabled = false; // regex_predicate->MatchRegex;
gotoFunc->RegexDictionary = new iknow::AHO::KnowledgebaseRegexDictionary; // Set gotoFunc.RegexDictionary = ##class(KnowledgebaseRegexDictionary).%New()
gotoFunc->RegexDictionary->Knowledgebase = this; // Set gotoFunc.RegexDictionary.Knowledgebase = kb

Expand All @@ -742,8 +746,9 @@ void CSV_DataGenerator::CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledge
cout << "Building goto and first output table..." << endl;
lexreps_Type::iterator key = kb_lexreps.begin(); // Set key = kb.NextLexrep("")
while (key != kb_lexreps.end()) { // While key '= ""
iKnow_KB_Lexrep lexrep = *key; // Set lexrep = kb.GetLexrep(key)
if (!regex_predicate->Check(lexrep)) { ++key; continue; } // If 'phasePredicate.Check(lexrep) Goto SkipLexrep
iKnow_KB_Lexrep &lexrep = *key; // Set lexrep = kb.GetLexrep(key)
// if (!regex_predicate->Check(lexrep)) { ++key; continue; } // If 'phasePredicate.Check(lexrep) Goto SkipLexrep
if (!(lexrep.isRegex == phase_switch)) { ++key; continue; } // If 'phasePredicate.Check(lexrep) Goto SkipLexrep
vector<int> labels = CreateLabelsIndexVector(lexrep, labelIndexTable); // Set labels = ..CreateLabelsIndexVector(lexrep, .labelIndexTable)

iknow::base::String token = IkStringEncoding::UTF8ToBase(lexrep.Token); // Set token = $ZCONVERT(lexrep.Token, "I", "UTF8")
Expand Down
18 changes: 7 additions & 11 deletions modules/compiler/iKnowLanguageCompiler/CSV_DataGenerator.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#pragma once
#ifdef WIN32
#pragma warning (disable: 4251)
#endif

#include "iKnow_KB_Metadata.h"
#include "iKnow_KB_Acronym.h"
#include "iKnow_KB_Regex.h"
Expand Down Expand Up @@ -64,7 +68,7 @@ namespace iknow {
static std::vector<iKnow_KB_PreprocessFilter> kb_prepro;
static std::vector<iKnow_KB_Rule> kb_rules;
static const std::vector<std::pair<int, std::string>> kb_properties;
typedef std::map<std::string, int> labelIndexTable_type;
typedef std::unordered_map<std::string, int> labelIndexTable_type;
labelIndexTable_type labelIndexTable; // mapping of labels to indexes

size_t LabelCount() {
Expand All @@ -85,17 +89,9 @@ namespace iknow {
std::string csv_path_;
std::string aho_path_;
std::string language_data_path_; // C:\P4\projects\ikNLP_DEV\modules\iknow\standalone\language_data
void CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledgebase,*/ std::string phase, Predicate *phasePredicate, std::string& outputDir_);
std::vector<int> CreateLabelsIndexVector(iKnow_KB_Lexrep& lexrep, std::map<std::string, int>& table);
void CompileLexrepDictionaryPhase(/*kb As %iKnow.KB.Knowledgebase,*/ std::string phase, bool phasePredicate /*Predicate *phasePredicate*/, std::string& outputDir_);
std::vector<int> CreateLabelsIndexVector(iKnow_KB_Lexrep& lexrep, std::unordered_map<std::string, int>& table);
};

class testje {
public:
testje(void) {}


};

}
}

3 changes: 2 additions & 1 deletion modules/compiler/iKnowLanguageCompiler/GotoFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "LexrepStateOutputFunc.h"
#include "IkStringEncoding.h"
#include "Util.h"
#include "utlExceptionFrom.h"

#include <fstream>
#include <map>
Expand Down Expand Up @@ -337,7 +338,7 @@ void GotoFunction::ToC(std::string dir)
String regexName = String(regex.begin() + 1, regex.end() - 1); // Set regexName = $E(regex, 2, *-1) //remove {}
String pattern = RegexDictionary->Lookup(regexName); // Set pattern = ..RegexDictionary.Lookup(regexName)

// If pattern = "" Throw ##class(%Exception.SystemException).%New("Unknown regex specified.")
if (pattern.empty()) throw ExceptionFrom<GotoFunction>("Unknown regex specified."); // If pattern = "" Throw ##class(%Exception.SystemException).%New("Unknown regex specified.")
ofs.o() << "static const Char Regex" << i << "Str[] = {"; // Write "static const Char Regex"_i_"Str[] = {"
for (int j = 1; j <= pattern.length(); j++) { // For j = 1 :1 : $L(pattern) {
ofs.o() << static_cast<int>(pattern[j - 1]) << ", "; // W $A(pattern, j)_", "
Expand Down
2 changes: 1 addition & 1 deletion modules/compiler/iKnowLanguageCompiler/GotoFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <map>
#include <vector>

#include "ikTypes.h"
#include "IkTypes.h"
#include "StateOutputFunction.h"
#include "KnowledgebaseRegexDictionary.h"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#pragma once
#ifdef WIN32
#pragma warning (disable: 4251)
#endif

#include "IkTypes.h"

namespace iknow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ int main(int argc, char* argv[])
string repo_root("C:/Users/jdenys/source/repos/iknow/");

string exe_path(argv[0]);
#ifdef WIN32
size_t kit_pos = exe_path.find("\\kit\\");
#else
size_t kit_pos = exe_path.find("/kit/");
#endif
if (kit_pos != string::npos) {
repo_root = string(exe_path.begin(), exe_path.begin() + kit_pos + 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,14 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;_CONSOLE;_DEBUG</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\base\src\headers;..\..\core\src\headers;..\..\shell\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>iKnowBase.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>iKnowBase.lib;iKnowCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
Expand All @@ -80,7 +80,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;_CONSOLE;NDEBUG</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\base\src\headers;..\..\core\src\headers;..\..\shell\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
Expand All @@ -89,7 +89,7 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>iKnowBase.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>iKnowBase.lib;iKnowCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(OutDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
Expand Down
19 changes: 5 additions & 14 deletions modules/compiler/iKnowLanguageCompiler/iKnow_KB_Acronym.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ using namespace iknow::csvdata;
using namespace std;

// ClassMethod ImportFromStream(stream As %CharacterStream, kb As Knowledgebase)
void iKnow_KB_Acronym::ImportFromCSV(std::string acro_csv, CSV_DataGenerator& kb)
bool iKnow_KB_Acronym::ImportFromCSV(std::string acro_csv, CSV_DataGenerator& kb)
{
ifstream ifs = ifstream(acro_csv, ifstream::in);
if (ifs.is_open()) {
Expand All @@ -33,18 +33,9 @@ void iKnow_KB_Acronym::ImportFromCSV(std::string acro_csv, CSV_DataGenerator& kb
kb.kb_acronyms.push_back(acronym); // Set sc = acronym.%Save()
// $$$IKModelCheck(sc, stream.Filename, count, line)
}
ifs.close();
return true;
}
else {
cerr << "Error opening file: " << acro_csv << " Language=\"" << kb.GetName() << "\"" << endl;
}
ifs.close();
}

iKnow_KB_Acronym::iKnow_KB_Acronym()
{
}


iKnow_KB_Acronym::~iKnow_KB_Acronym()
{
cerr << "Error opening file: " << acro_csv << " Language=\"" << kb.GetName() << "\"" << endl;
return false;
}
Loading

0 comments on commit 9c2fc4f

Please sign in to comment.