Skip to content

Commit

Permalink
Remove dependency on c++ regex library
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabor Angeli committed Dec 1, 2015
1 parent 2ab425b commit 1d3d10a
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 157 deletions.
2 changes: 1 addition & 1 deletion etc/mkGraph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ PRIVATIVE_WORDS=`echo "^(" \
"proposed" \
") [0-9]+" | sed -e 's/| /|/g'`

cat "$DIR/$GRAPH_DATA/edge_*.txt" |\
cat "$DIR/$GRAPH_DATA/"edge_*.txt |\
sed -e 's/_/ /g' |\
awk -F' ' '{ print $1 "\t" $2 }' |\
egrep "$PRIVATIVE_WORDS" |\
Expand Down
4 changes: 0 additions & 4 deletions pub/acl2015/talk/clause.tex
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,3 @@
\item POS tag of governor; dependent.
\end{itemize}
\end{frame}




7 changes: 1 addition & 6 deletions pub/ai2/motivation.tex
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,8 @@
\item Grounding 3,800 rules takes 7 hours (Alchemy).
\pause
\item Add Chris R\'{e} $+$ decades of DB research: 106 seconds.
\item $\dots$ but relation extraction over 1M sentences needs 500GB memory.
\item $\dots$ but still needs lots of memory.
\end{itemize}
\vspace{1ex}
\pause

\hh{We want to run inference over 300M premises}

\end{frame}

Expand Down Expand Up @@ -102,7 +98,6 @@
\pause
\item But, can still infer: \w{Most students learned it at a school.}
\end{itemize}

\end{frame}


109 changes: 13 additions & 96 deletions pub/figures.tex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@


%
% -----
% The full Finite State Automata over inference states
Expand Down Expand Up @@ -710,28 +711,28 @@

\def\exampleStepOne{\exampleTree{ROOT \\ $~$}
{No carnivores eat animals}
{\textbf{The} carnivores eat animals}
{No \textbf{cats} eat animals}}
{\darkblue{The} carnivores eat animals}
{No \darkblue{animals} eat animals}}
\def\exampleStepTwo{\exampleTree{No carnivores eat animals}
{The carnivores eat animals}
{The \textbf{feline} eats animals}
{\textbf{All} carnivores eat animals}}
{The \darkblue{feline} eats animals}
{\darkblue{All} carnivores eat animals}}
\def\exampleStepThree{\exampleTree{The carnivores eat animals}
{The feline eats animals}
{The \textbf{cat} eats animals}
{The cat eats \textbf{chordate}}}
{The \darkblue{cat} eats animals}
{The cat eats \darkblue{chordate}}}
\def\exampleStepFour{\exampleTree{The feline eats animals}
{The cat eats animals}
{The cat eats \textbf{chordates}}
{The \textbf{kitty} eats animals}}
{The cat eats \darkblue{chordates}}
{The \darkblue{kitty} eats animals}}
\def\exampleStepFive{\exampleTree{The cat eats animals}
{The cat eats chordates}
{The cat eats \textbf{mice}}
{The cat eats \textbf{dogs}}}
{The cat eats \darkblue{mice}}
{The cat eats \darkblue{dogs}}}
\def\exampleStepSix{\exampleTreeEnd{The cat eats chordates}
{The cat eats mice}
{The cat ate \textbf{a} mouse}
{The \textbf{kitty} eats mice}}
{The cat ate \darkblue{a} mouse}
{The \darkblue{kitty} eats mice}}
\def\exampleStepEnd{
\setstyles
\begin{tikzpicture}[grow=down, sloped]
Expand Down Expand Up @@ -859,90 +860,6 @@



%
% -----
% MIML-RE Figures
% -----
%

\def\mimlStyles{
\tikzstyle{var}=[circle,thick,draw=black!75,fill=black!0,minimum size=10mm]
\tikzstyle{ovar}=[circle,thick,draw=black!75,fill=gray!20,minimum size=10mm]
}

\newcommand\dsPlate{
\begin{tikzpicture}[->,>=stealth,shorten >=1pt,auto,node distance=1.5cm and 0.75cm,
semithick]
\mimlStyles
\node[ovar] (y) [] {$y$};
\node[ovar] (x) [below =of y] {$x$};

\path (x) edge (y);
\end{tikzpicture}
}

\newcommand\miPlate{
\begin{tikzpicture}[->,>=stealth,shorten >=1pt,auto,node distance=1.5cm and 0.75cm,
semithick]
\mimlStyles
\node[ovar] (y) [] {$y$};

\node[var] (zTwo) [below =of y] {$z_2$};
\node[ovar] (xTwo) [below =of zTwo] {$x_2$};

\node[var] (zOne) [left =of zTwo] {$z_1$};
\node[ovar] (xOne) [left =of xTwo] {$x_1$};

\node[var] (zThree) [right =of zTwo] {$z_3$};
\node[ovar] (xThree) [right =of xTwo] {$x_3$};

\path (xOne) edge (zOne)
(xTwo) edge (zTwo)
(xThree) edge (zThree)
(zOne) edge (y)
(zTwo) edge (y)
(zThree) edge (y);
\end{tikzpicture}
}

\newcommand\mimlPlate{
\begin{tikzpicture}[->,>=stealth,shorten >=1pt,auto,node distance=1.5cm and 0.75cm,
semithick]
\mimlStyles
\node[ovar] (yOne) {$y_1$};
\node[ovar] (yTwo) [right =of yOne] {$y_2$};
\node[] (yDots) [right =of yTwo] {$\dots$};
\node[ovar] (yNMin) [right =of yDots] {$y_{n-1}$};
\node[ovar] (yN) [right =of yNMin] {$y_n$};

\node[var] (zTwo) [below =of yDots] {$z_2$};
\node[ovar] (xTwo) [below =of zTwo] {$x_2$};

\node[var] (zOne) [left =of zTwo] {$z_1$};
\node[ovar] (xOne) [left =of xTwo] {$x_1$};

\node[var] (zThree) [right =of zTwo] {$z_3$};
\node[ovar] (xThree) [right =of xTwo] {$x_3$};

\path (xOne) edge (zOne)
(xTwo) edge (zTwo)
(xThree) edge (zThree)
(zOne) edge (yOne)
edge (yTwo)
edge (yNMin)
edge (yN)
(zTwo) edge (yOne)
edge (yTwo)
edge (yNMin)
edge (yN)
(zThree) edge (yOne)
edge (yTwo)
edge (yNMin)
edge (yN);
\end{tikzpicture}
}


%
% Standalone figures
%
Expand Down
18 changes: 13 additions & 5 deletions pub/macros.tex
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@
\newcommand\sys[1]{\textsc{#1}}
\newcommand\w[1]{\textit{\darkgreen{#1}}}
\newcommand\ww[1]{\textit{#1}}
\newcommand\true[1]{\darkgreen{\textit{#1}}}
\newcommand\false[1]{\darkred{\textit{#1}}}
\newcommand\unknown[1]{\orange{\textit{#1}}}
\newcommand{\indentitem}{\setlength\itemindent{25pt}}

\newcommand\denote[1]{\ensuremath{\llbracket#1\rrbracket}}
Expand Down Expand Up @@ -54,6 +51,10 @@
\newcommand{\noop}{\textcolor{white}{NOOP}}
\newcommand{\noopTab}{\begin{tabular}{c} \textcolor{white}{NOOP} \\ \textcolor{white}{NOOP} \end{tabular}}

\newcommand\true[1]{\darkgreen{\checkmark\textit{#1}}}
\newcommand\false[1]{\darkred{\xmark$~\,$\textit{#1}}}
\newcommand\unknown[1]{?\orange{\textit{#1}}}

\newcommand{\verticalcenter}[1]{\begingroup
\setbox0=\hbox{#1}%
\parbox{\wd0}{\box0}\endgroup}
Expand All @@ -78,11 +79,10 @@
% KBP Specific
% An entity
\def\ent#1{\text{\small{\textsc{#1}}}}
% A phrase
\def\w#1{\textit{#1}}

% An extraction, e.g., "Obama born_in Hawaii"
\newcommand\extr[3]{\mbox{\ent{#1}\ $~$\rel{#2}\ $~$\ent{#3}}}
\newcommand\triple[3]{(\mbox{\ent{#1}; $~$\rel{#2}; $~$\ent{#3}})}
% A clause in a logical form, e.g., "born_in(Obama, Hawaii)"
\newcommand\clause[3]{\mbox{\rel{#2}\ensuremath{(#1, #3)}}}

Expand All @@ -109,3 +109,11 @@
\end{tabular}
\vspace{2mm}
}

\tikzset{
invisible/.style={opacity=0},
visible on/.style={alt=#1{}{invisible}},
alt/.code args={<#1>#2#3}{%
\alt<#1>{\pgfkeysalso{#2}}{\pgfkeysalso{#3}} % \pgfkeysalso doesn't change the path
},
}
1 change: 1 addition & 0 deletions src/JavaBridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ JavaBridge::JavaBridge() {
snprintf(classpath, 1024, "%s/naturalli_preprocess.jar:%s/jaws.jar:%s/../lib/jaws.jar:%s:%s",
thisDir.c_str(), thisDir.c_str(), thisDir.c_str(),
CORENLP, CORENLP_MODELS);
printf("%s\n", classpath);

// Start program
dup2(pipeIn[0], STDIN_FILENO);
Expand Down
66 changes: 48 additions & 18 deletions src/NaturalLIIO.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <sys/types.h>
#include <sys/resource.h>
#include <signal.h>
#include <regex>
#include <regex.h>
#include <thread>
#include <unistd.h>

Expand Down Expand Up @@ -208,31 +208,51 @@ bool to_bool(std::string str) {
return b;
}

/** The regex for a key@specifier=value triple */
regex regexSetValue("([^ ]+) *@ *([^ ]+) *= *([^ ]+)",
std::regex_constants::egrep);
/** The regex for a key=value pair */
regex regexSetFlag("([^ ]+) *= *([^ ]+) *", std::regex_constants::egrep);

//
// Get the gloss of a regex group matched against a string.
//
string regexGroup(const regmatch_t& pmatch, const string& source) {
uint32_t start = pmatch.rm_so;
uint32_t end = pmatch.rm_eo;
char group[end - start + 1];
memcpy(group, source.c_str() + start, end - start);
group[end - start] = 0;
return string(group);
}

//
// parseMetadata()
//
bool parseMetadata(const char *rawLine, SynSearchCosts **costs,
vector<AlignmentSimilarity>* alignments,
syn_search_options *opts) {
// Compile the regexes
regex_t regexSetValue;
if (regcomp(&regexSetValue, "([^ ]+) *@ *([^ ]+) *= *([^ ]+)", REG_EXTENDED)) {
fprintf(stderr, "Could not compile regex set_value");
exit(1);
}
regex_t regexSetFlag;
if (regcomp(&regexSetFlag, "([^ ]+) *= *([^ ]+) *", REG_EXTENDED)) {
fprintf(stderr, "Could not compile regex set_flag");
exit(1);
}
regmatch_t pmatch[4];

// Error checks
assert(rawLine[0] != '\0');
assert(rawLine[0] == '%');

// Run regex
string line(&(rawLine[1]));
smatch result;
if (regex_search(line, result, regexSetValue)) {
if (result.size() != 4) {
return false;
}
string toSet = result[1].str();
string key = result[2].str();
string value = result[3].str();
uint32_t ivalue = atoi(result[3].str().c_str());
if (!regexec(&regexSetValue, line.c_str(), 4, pmatch, 0)) {
string toSet = regexGroup(pmatch[1], line);
string key = regexGroup(pmatch[2], line);
string value = regexGroup(pmatch[3], line);


uint32_t ivalue = atoi(value.c_str());
if (toSet == "mutationLexicalCost") {
(*costs)->mutationLexicalCost[indexEdgeType(key)] = atof(value.c_str());
} else if (toSet == "insertionLexicalCost") {
Expand All @@ -243,12 +263,16 @@ bool parseMetadata(const char *rawLine, SynSearchCosts **costs,
(*costs)->transitionCostFromFalse[indexNatlogRelation(key)] = atof(value.c_str());
} else {
fprintf(stderr, "Unknown key: '%s'\n", toSet.c_str());
regfree(&regexSetValue);
regfree(&regexSetFlag);
return false;
}
regfree(&regexSetValue);
regfree(&regexSetFlag);
return true;
} else if (regex_search(line, result, regexSetFlag)) {
string toSet = result[1].str();
string value = result[2].str();
} else if (!regexec(&regexSetFlag, line.c_str(), 3, pmatch, 0)) {
string toSet = regexGroup(pmatch[1], line);
string value = regexGroup(pmatch[2], line);
if (toSet == "maxTicks") {
opts->maxTicks = atof(value.c_str());
fprintf(stderr, "set maxTicks to %u\n", opts->maxTicks);
Expand Down Expand Up @@ -284,12 +308,18 @@ bool parseMetadata(const char *rawLine, SynSearchCosts **costs,
}
} else {
fprintf(stderr, "Unknown flag: '%s'\n", toSet.c_str());
regfree(&regexSetValue);
regfree(&regexSetFlag);
return false;
}
regfree(&regexSetValue);
regfree(&regexSetFlag);
return true;
}
fprintf(stderr, "WARNING line NOT parsed as a directive: '%s'\n",
line.c_str());
regfree(&regexSetValue);
regfree(&regexSetFlag);
return false; // By default, no metadata
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ public class NaturalLIClassifier implements EntailmentClassifier {
public static boolean USE_LUCENE = true;

@Execution.Option(name="naturalli.weight", gloss="The weight to incorporate NaturalLI with")
public static double ALIGNMENT_WEIGHT = 6.0;
public static double CLASSIFIER_WEIGHT = 1.0 / 4.0;
public static double ALIGNMENT_WEIGHT = 0.5;
public static double CLASSIFIER_WEIGHT = 0.5;

@Execution.Option(name="naturalli.incache", gloss="The cache to read from")
private static String NATURALLI_INCACHE = "logs/train_all.cache";
Expand Down
Loading

0 comments on commit 1d3d10a

Please sign in to comment.