From deb206b7d1c1e7ce87d6e33c7a1dff91b3adb82b Mon Sep 17 00:00:00 2001 From: Nandish Jayaram Date: Wed, 27 Jun 2018 11:25:46 -0700 Subject: [PATCH 1/2] Bugfix: Fix failing dev check in CRF This commit has the following changes: - A couple of dev check files in CRF did not have the label table creation in it. But the label table was consumed by one of the queries that led to dev-check failure. - Run dev check on Jenkins build instead of install check. --- .../modules/crf/test/crf_test_small.sql_in | 10 +-- .../modules/crf/test/crf_train_large.sql_in | 84 +++++++++++-------- tool/jenkins/jenkins_build.sh | 14 ++-- 3 files changed, 61 insertions(+), 47 deletions(-) diff --git a/src/ports/postgres/modules/crf/test/crf_test_small.sql_in b/src/ports/postgres/modules/crf/test/crf_test_small.sql_in index 1e9533ba6..5d1f4bc40 100644 --- a/src/ports/postgres/modules/crf/test/crf_test_small.sql_in +++ b/src/ports/postgres/modules/crf/test/crf_test_small.sql_in @@ -14,7 +14,7 @@ (2,E'But analysts reckon underlying support for sterling has been eroded by the chancellor ''s failure to announce any new policy measures in his Mansion House speech last Thursday .'), (3,E'His actions prevent disaster.'); analyze crf_document; - + -- Features table CREATE TABLE crf_feature_test (id integer,name text,prev_label_id integer,label_id integer,weight float); INSERT INTO crf_feature_test VALUES @@ -90,7 +90,7 @@ (18,'PRP$'),(19,'RB'), (20,'RBR'), (21,'RBS'), (22,'RP'), (23,'SYM'), (24,'TO'), (25,'UH'), (26,'VB'), (27,'VBD'), (28,'VBG'),(29,'VBN'), (30,'VBP'), (31,'VBZ'),(32,'WDT'), (33,'WP'), (34,'WP$'),(35,'WRB'), (36,'$'), (37,'#'), (38,''''''), (39,'``'), (40,'('), (41,')'), (42,','), (43,'.'), (44,':'); - analyze crf_label; + analyze test_crf_label; -- Segment table CREATE TABLE test_segmenttbl (start_pos integer,doc_id integer,seg_text text,max_pos integer); @@ -106,15 +106,15 @@ (13,2,'''s',28), (14,2,'failure',28), (15,2,'to',28), (16,2,'announce',28), (17,2,'any',28), (18,2,'new',28), (19,2,'policy',28), (20,2,'measures',28),(21,2,'in',28), (22,2,'his',28), (23,2,'mansion',28), (24,2,'house',28), (25,2,'speech',28), (26,2,'last',28), (27,2,'thursday',28), - (28,2,'.',28), (0,3,'his',4), (1,3,'actions',4), (2,3,'prevent',4), (3,3,'disaster',4), + (28,2,'.',28), (0,3,'his',4), (1,3,'actions',4), (2,3,'prevent',4), (3,3,'disaster',4), (4,3,'.',4); analyze test_segmenttbl; - -- extract features for tokens stored in segmenttbl + -- extract features for tokens stored in segmenttbl SELECT crf_test_fgen('test_segmenttbl','crf_dictionary','test_crf_label','crf_regex','crf_feature_test','viterbi_mtbl','viterbi_rtbl'); -- Expected viterbi labeling result - -- The result is produced from Dr. Sunita's CRF java package with the same input + -- The result is produced from Dr. Sunita's CRF java package with the same input CREATE TABLE expected_extraction(doc_id integer, start_pos integer, seg_text text, label character varying); INSERT INTO expected_extraction VALUES (1,0,'chancellor','NNP'),(1,1,'of','IN'), (1,2,'the','DT'), (1,3,'exchequer','NNP'), (1,4,'nigel','NNP'), diff --git a/src/ports/postgres/modules/crf/test/crf_train_large.sql_in b/src/ports/postgres/modules/crf/test/crf_train_large.sql_in index d4a2d866f..b6d4e7c44 100644 --- a/src/ports/postgres/modules/crf/test/crf_train_large.sql_in +++ b/src/ports/postgres/modules/crf/test/crf_train_large.sql_in @@ -234,26 +234,40 @@ INSERT INTO train_new_segmenttbl VALUES (30, 7, 'years', 13, 31), (31, 7, '.', 44, 31); -CREATE TABLE train_new_regex(pattern text,name text); +CREATE TABLE train_new_regex(pattern text,name text); INSERT INTO train_new_regex VALUES -('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'), + ('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'), ('^.*[0-9]+.*$','containsDigit'),('^.+[.]$','endsWithDot'), ('^.+[,]$','endsWithComma'), ('^.+er$','endsWithER'), ('^.+est$','endsWithEst'), ('^.+ed$','endsWithED'), ('^.+s$','endsWithS'), ('^.+ing$','endsWithIng'), ('^.+ly$','endsWithly'), ('^.+-.+$','isDashSeparatedWords'), ('^.*@.*$','isEmailId'); - analyze train_new_regex; +analyze train_new_regex; - SELECT crf_train_fgen('train_new_segmenttbl', 'train_new_regex', 'crf_label', 'train_new_dictionary', 'train_new_featuretbl','train_new_featureset'); +CREATE TABLE crf_label_new (id integer,label character varying); +INSERT INTO crf_label_new VALUES + (0,'CC'), (1,'CD'), (2,'DT'), (3,'EX'), (4,'FW'), (5,'IN'), (6,'JJ'), (7,'JJR'), (8,'JJS'), + (9,'LS'), (10,'MD'), (11,'NN'), (12,'NNS'), (13,'NNP'),(14,'NNPS'),(15,'PDT'),(16,'POS'),(17,'PRP'), + (18,'PRP$'),(19,'RB'), (20,'RBR'), (21,'RBS'), (22,'RP'), (23,'SYM'), (24,'TO'), (25,'UH'), (26,'VB'), + (27,'VBD'), (28,'VBG'),(29,'VBN'), (30,'VBP'), (31,'VBZ'),(32,'WDT'), (33,'WP'), (34,'WP$'),(35,'WRB'), + (36,'$'), (37,'#'), (38,''''''); +INSERT INTO crf_label_new VALUES + (39,); +m4_changequote(,) +INSERT INTO crf_label_new VALUES + (40,'('), (41,')'), (42,','), (43,'.'), (44,':'); +analyze crf_label_new; - SELECT lincrf_train('train_new_featuretbl', 'train_new_featureset', 'crf_label', 'train_new_stats', 'train_new_crf_feature', 30); +SELECT crf_train_fgen('train_new_segmenttbl', 'train_new_regex', 'crf_label_new', 'train_new_dictionary', 'train_new_featuretbl','train_new_featureset'); - -- Expected feature table - -- The result is produced from Dr. Sunita's CRF java package with the same input - CREATE TABLE expected_crf_feature_new(id integer,name text,prev_label integer,label integer,weight float); +SELECT lincrf_train('train_new_featuretbl', 'train_new_featureset', 'crf_label_new', 'train_new_stats', 'train_new_crf_feature', 30); - INSERT INTO expected_crf_feature_new VALUES +-- Expected feature table +-- The result is produced from Dr. Sunita's CRF java package with the same input +CREATE TABLE expected_crf_feature_new(id integer,name text,prev_label integer,label integer,weight float); + +INSERT INTO expected_crf_feature_new VALUES (0, 'S.', -1, 12, 0.5516753522178934), (1, 'W_freight', -1, 12, 5.959241076198326), (2, 'E.', 12, 13, 2.0789747316372034), @@ -545,33 +559,33 @@ INSERT INTO train_new_regex VALUES (288, 'E.', 27, 13, 0.6748848167259296), (289, 'W_past', -1, 7, 2.852378831268221); - SELECT assert( - SUM(abs(c1.weight-c2.weight)) < 0.1, - 'Total difference between extracted feature weights and expected feature weights is > 0.1.') - FROM expected_crf_feature_new c1, train_new_crf_feature c2 - WHERE c1.name = c2.name AND c1.prev_label = c2.prev_label_id AND c1.label = c2.label_id;; +SELECT assert( + SUM(abs(c1.weight-c2.weight)) < 0.1, + 'Total difference between extracted feature weights and expected feature weights is > 0.1.') +FROM expected_crf_feature_new c1, train_new_crf_feature c2 +WHERE c1.name = c2.name AND c1.prev_label = c2.prev_label_id AND c1.label = c2.label_id;; - -- Compare the expected features and the extraction features. It fails - -- if the features do not match. - SELECT assert(s1.count+s2.count = 0, 'Features extracted do not match expected features.') - FROM ( - SELECT count(*) FROM( - SELECT name, prev_label, label - FROM expected_crf_feature_new - EXCEPT ALL - SELECT name, prev_label_id, label_id - FROM train_new_crf_feature - ) AS U - )s1, - ( - SELECT count(*) FROM( - SELECT name, prev_label_id, label_id - FROM train_new_crf_feature - EXCEPT ALL - SELECT name, prev_label, label - FROM expected_crf_feature_new - ) AS U - )s2; +-- Compare the expected features and the extraction features. It fails +-- if the features do not match. +SELECT assert(s1.count+s2.count = 0, 'Features extracted do not match expected features.') +FROM ( + SELECT count(*) FROM( + SELECT name, prev_label, label + FROM expected_crf_feature_new + EXCEPT ALL + SELECT name, prev_label_id, label_id + FROM train_new_crf_feature + ) AS U +)s1, +( + SELECT count(*) FROM( + SELECT name, prev_label_id, label_id + FROM train_new_crf_feature + EXCEPT ALL + SELECT name, prev_label, label + FROM expected_crf_feature_new + ) AS U +)s2; !>) m4_changequote(,) diff --git a/tool/jenkins/jenkins_build.sh b/tool/jenkins/jenkins_build.sh index 0dce94373..c159b5e49 100644 --- a/tool/jenkins/jenkins_build.sh +++ b/tool/jenkins/jenkins_build.sh @@ -66,8 +66,8 @@ docker exec madlib bash -c 'rm -rf /build; mkdir /build; cd /build; cmake ../mad EOF docker exec madlib bash -c 'rm -rf /build; mkdir /build; cd /build; cmake ../madlib; make clean; make; make install; make package' | tee $workdir/logs/madlib_compile.log -echo "---------- Installing and running install-check --------------------" -# Install MADlib and run install check +echo "---------- Installing and running dev-check --------------------" +# Install MADlib and run dev check cat < Date: Wed, 27 Jun 2018 14:50:19 -0700 Subject: [PATCH 2/2] Remove jenkins build script changes from this commit --- tool/jenkins/jenkins_build.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tool/jenkins/jenkins_build.sh b/tool/jenkins/jenkins_build.sh index c159b5e49..0dce94373 100644 --- a/tool/jenkins/jenkins_build.sh +++ b/tool/jenkins/jenkins_build.sh @@ -66,8 +66,8 @@ docker exec madlib bash -c 'rm -rf /build; mkdir /build; cd /build; cmake ../mad EOF docker exec madlib bash -c 'rm -rf /build; mkdir /build; cd /build; cmake ../madlib; make clean; make; make install; make package' | tee $workdir/logs/madlib_compile.log -echo "---------- Installing and running dev-check --------------------" -# Install MADlib and run dev check +echo "---------- Installing and running install-check --------------------" +# Install MADlib and run install check cat <