Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Uwe Schmidt
committed
Jun 24, 2013
1 parent
b3df797
commit ed14d32
Showing
28 changed files
with
6,071 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
-- ------------------------------------------------------------ | ||
|
||
{- | | ||
HXmlParser - Minimal Validating XML Parser of the Haskell XML Toolbox, no HTTP supported | ||
XML well-formed checker and validator. | ||
this program may be used as example main program for the | ||
API of the Haskell XML Toolbox | ||
commandline parameter evaluation and | ||
and return code is the most complicated part | ||
of this example application | ||
-} | ||
|
||
-- ------------------------------------------------------------ | ||
|
||
module Main | ||
where | ||
|
||
import Text.XML.HXT.Core9 | ||
|
||
import System.Console.GetOpt | ||
import System.Environment | ||
import System.Exit | ||
import System.IO | ||
|
||
-- ------------------------------------------------------------ | ||
|
||
-- | | ||
-- the main program of the Haskell XML Validating Parser | ||
|
||
main :: IO () | ||
main | ||
= do | ||
argv <- getArgs -- get the commandline arguments | ||
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list | ||
[rc] <- runX (parser al src) -- run the parser arrow | ||
exitProg (rc >= c_err) -- set return code and terminate | ||
|
||
-- ------------------------------------------------------------ | ||
|
||
exitProg :: Bool -> IO a | ||
exitProg True = exitWith (ExitFailure (-1)) | ||
exitProg False = exitWith ExitSuccess | ||
|
||
-- ------------------------------------------------------------ | ||
|
||
-- | | ||
-- the /real/ main program | ||
-- | ||
-- get wellformed document, validates document, propagates and check namespaces | ||
-- and controls output | ||
|
||
parser :: SysConfigList -> String -> IOSArrow b Int | ||
parser config src | ||
= configSysVars config -- set all global config options, the output file and the | ||
>>> -- other user options are stored as key-value pairs in the stystem state | ||
readDocument [] src -- no more special read options needed | ||
>>> | ||
( ( traceMsg 1 "start processing document" | ||
>>> | ||
( processDocument $< getSysAttr "action" ) -- ask for the action stored in the key-value list of user defined values | ||
>>> | ||
traceMsg 1 "document processing finished" | ||
) | ||
`when` | ||
documentStatusOk | ||
) | ||
>>> | ||
traceSource | ||
>>> | ||
traceTree | ||
>>> | ||
( (writeDocument [] $< getSysAttr "output-file") -- ask for the output file stored in the system configuration | ||
`whenNot` | ||
( getSysAttr "no-output" >>> isA (== "1") ) -- ask for the no-output attr value in the system key-value list | ||
) | ||
>>> | ||
getErrStatus | ||
|
||
-- simple example of a processing arrow, selected by a command line option | ||
|
||
processDocument :: String -> IOSArrow XmlTree XmlTree | ||
processDocument "only-text" | ||
= traceMsg 1 "selecting plain text" | ||
>>> | ||
processChildren (deep isText) | ||
|
||
processDocument "indent" | ||
= traceMsg 1 "indent document" | ||
>>> | ||
indentDoc | ||
|
||
processDocument _action | ||
= traceMsg 1 "default action: do nothing" | ||
>>> | ||
this | ||
|
||
-- ------------------------------------------------------------ | ||
-- | ||
-- the options definition part | ||
-- see doc for System.Console.GetOpt | ||
|
||
progName :: String | ||
progName = "HXmlParser" | ||
|
||
options :: [OptDescr SysConfig] | ||
options | ||
= generalOptions | ||
++ | ||
inputOptions | ||
++ | ||
outputOptions | ||
++ | ||
showOptions | ||
++ | ||
[ Option "q" ["no-output"] (NoArg $ withSysAttr "no-output" "1") "no output of resulting document" | ||
, Option "x" ["action"] (ReqArg (withSysAttr "action") "ACTION") "actions are: only-text, indent, no-op" | ||
] | ||
-- the last 2 option values will be stored by withAttr in the system key-value list | ||
-- and can be read by getSysAttr key | ||
|
||
usage :: [String] -> IO a | ||
usage errl | ||
| null errl | ||
= do | ||
hPutStrLn stdout use | ||
exitProg False | ||
| otherwise | ||
= do | ||
hPutStrLn stderr (concat errl ++ "\n" ++ use) | ||
exitProg True | ||
where | ||
header = "HXmlParser - Validating XML Parser of the Haskell XML Toolbox with Monadic Interface\n" ++ | ||
"XML well-formed checker, DTD validator, HTML parser.\n\n" ++ | ||
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]" | ||
use = usageInfo header options | ||
|
||
cmdlineOpts :: [String] -> IO (SysConfigList, String) | ||
cmdlineOpts argv | ||
= case (getOpt Permute options argv) of | ||
(scfg,n,[]) | ||
-> do | ||
sa <- src n | ||
help (getConfigAttr a_help scfg) sa | ||
return (scfg, sa) | ||
(_,_,errs) | ||
-> usage errs | ||
where | ||
src [] = return [] | ||
src [uri] = return uri | ||
src _ = usage ["only one input uri or file allowed\n"] | ||
|
||
help "1" _ = usage [] | ||
help _ [] = usage ["no input uri or file given\n"] | ||
help _ _ = return () | ||
|
||
-- ------------------------------------------------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
prog = dist/build/core9-hparser/core9-hparser | ||
|
||
all : $(prog) | ||
|
||
$(prog) : | ||
cabal configure && cabal build | ||
|
||
force : | ||
cabal clean && $(MAKE) $(prog) | ||
|
||
test : $(prog) | ||
@echo "===> run a few simple test cases with the validating parser" | ||
@echo "===> first see all command line options" | ||
$(prog) --help | ||
@echo | ||
$(MAKE) test0 test1 test2 test3 test4 | ||
|
||
EX1 = ../monad-hparser/example1.xml | ||
EX1a = ../monad-hparser/example1CRLF.xml | ||
EXi = ../monad-hparser/invalid.xml | ||
EX2 = ../../xhtml/xhtml.xml | ||
EX3 = ../monad-hparser/namespace0.xml | ||
EX3a = ../monad-hparser/namespace1.xml | ||
EX4 = ../monad-hparser/lousy.html | ||
EX4a = ../monad-hparser/emptyElements.html | ||
|
||
EX = $(wildcard example*.xml) $(wildcard lousy*.html) $(wildcard empty*.html) $(wildcard *valid*.xml *valid*.rng) $(wildcard namespace*.xml) | ||
|
||
test0 : | ||
@echo "===> a 1. simple valid document" | ||
$(prog) $(EX1) | ||
@echo | ||
@echo "===> the dom tree of the same document (without any redundant whitespace)" | ||
$(prog) --show-tree --remove-whitespace $(EX1) | ||
@echo | ||
@echo "===> the next test case contains validation erors, it must fail" | ||
$(prog) --verbose $(EXi) || true | ||
@echo | ||
@echo "===> same source, but only wellformed check" | ||
$(prog) --do-not-validate $(EXi) || true | ||
@echo | ||
@echo "===> only validation, no output of an XHTML source" | ||
$(prog) --verbose --no-output $(EX2) | ||
@echo | ||
|
||
test1 : | ||
@echo "===> the source of a very simple valid document" ; echo ; sleep 2 | ||
cat $(EX1) | ||
@sleep 2 ; echo ; echo "===> parser will emit UTF-8" ; echo ; sleep 2 | ||
$(prog) --output-encoding=UTF-8 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with ISO-8859-1 (latin1) output" ; echo ; sleep 2 | ||
$(prog) --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with US-ASCII output" ; echo ; sleep 2 | ||
$(prog) --output-encoding=US-ASCII $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 | ||
$(prog) --show-tree --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again, but without any markup" ; echo ; sleep 2 | ||
$(prog) --action=only-text --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> same source, but with CRLF, parser will emit UTF-8" ; echo ; sleep 2 | ||
$(prog) --output-encoding=UTF-8 $(EX1a) | ||
@echo | ||
|
||
test2 : | ||
@echo "===> the source of a xhtml document" ; echo ; sleep 2 | ||
cat $(EX2) | ||
@echo "that document has" `cat $(EX2) | wc -l` "lines" | ||
@sleep 2 ; echo ; echo "===> parser will validate this document and try to indent the output" ; echo ; sleep 2 | ||
$(prog) --indent $(EX2) | ||
@sleep 2 ; echo ; echo "===> once again, but remove all markup" ; echo ; sleep 2 | ||
$(prog) --action=only-text --remove-whitespace $(EX2) | ||
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 | ||
$(prog) --show-tree --remove-whitespace $(EX2) | ||
|
||
test3 : | ||
@echo "===> namespace processing examples" ; echo ; sleep 2 | ||
@echo "===> namespace propagation test" ; echo ; sleep 2 | ||
$(prog) --verbose --check-namespaces --indent --output-encoding=UTF-8 $(EX3) | ||
@echo | ||
@echo ; sleep 2 ; echo "===> namespace propagation test: tree output with attached namespaces" ; echo ; sleep 2 | ||
$(prog) --verbose --check-namespaces --remove-whitespace --show-tree --output-encoding=ISO-8859-1 $(EX3) | ||
@echo | ||
@echo ; sleep 2 ; echo "===> namespace validation test: this test produces namespace errors" ; echo ; sleep 2 | ||
$(prog) --verbose --do-not-validate --check-namespaces --indent --output-encoding=ISO-8859-1 $(EX3a) || true | ||
@echo | ||
|
||
|
||
test4 : | ||
@echo "===> HTML parsing examples" ; echo ; sleep 2 | ||
@echo "===> the source of a lousy html document" ; echo ; sleep 2 | ||
cat $(EX4) | ||
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to build a document tree" ; echo ; sleep 2 | ||
$(prog) --indent --preserve-comment --parse-html $(EX4) | ||
@echo "===> the source of another lousy html document containing empty elements" ; echo ; sleep 2 | ||
cat $(EX4a) | ||
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to format this as a HTML document without any dangarous empty elements" ; echo ; sleep 2 | ||
$(prog) --indent --preserve-comment --parse-html --output-xhtml $(EX4a) | ||
@echo | ||
|
||
clean : | ||
cabal clean | ||
|
||
.PHONY : all test test0 test1 test2 test3 test4 dist clean prof local force |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
name: core9-hparser | ||
version: 0.1.0.0 | ||
synopsis: XML/HTML parser with monadic version of HXT and HXT.9 compatibility mode | ||
description: XML/HTML parser with monadic version of HXT and HXT.9 compatibility mode | ||
license: MIT | ||
author: Uwe Schmidt | ||
maintainer: uwe@fh-wedel.de | ||
copyright: 2013, Uwe Schmidt | ||
category: Testing | ||
build-type: Simple | ||
cabal-version: >=1.8 | ||
|
||
executable core9-hparser | ||
main-is: Main.hs | ||
build-depends: base, | ||
hxt-monad ==1.0.* |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
prog = dist/build/monad-hparser/monad-hparser | ||
|
||
all : $(prog) | ||
|
||
$(prog) : | ||
cabal configure && cabal build | ||
|
||
force : | ||
cabal clean && $(MAKE) $(prog) | ||
|
||
test : $(prog) | ||
@echo "===> run a few simple test cases with the validating parser" | ||
@echo "===> first see all command line options" | ||
$(prog) --help | ||
@echo | ||
$(MAKE) test0 test1 test2 test3 test4 | ||
|
||
EX1 = ../monad-hparser/example1.xml | ||
EX1a = ../monad-hparser/example1CRLF.xml | ||
EXi = ../monad-hparser/invalid.xml | ||
EX2 = ../../xhtml/xhtml.xml | ||
EX3 = ../monad-hparser/namespace0.xml | ||
EX3a = ../monad-hparser/namespace1.xml | ||
EX4 = ../monad-hparser/lousy.html | ||
EX4a = ../monad-hparser/emptyElements.html | ||
|
||
EX = $(wildcard example*.xml) $(wildcard lousy*.html) $(wildcard empty*.html) $(wildcard *valid*.xml *valid*.rng) $(wildcard namespace*.xml) | ||
|
||
test0 : | ||
@echo "===> a 1. simple valid document" | ||
$(prog) $(EX1) | ||
@echo | ||
@echo "===> the dom tree of the same document (without any redundant whitespace)" | ||
$(prog) --show-tree --remove-whitespace $(EX1) | ||
@echo | ||
@echo "===> the next test case contains validation erors, it must fail" | ||
$(prog) --verbose $(EXi) || true | ||
@echo | ||
@echo "===> same source, but only wellformed check" | ||
$(prog) --do-not-validate $(EXi) || true | ||
@echo | ||
@echo "===> only validation, no output of an XHTML source" | ||
$(prog) --verbose --no-output $(EX2) | ||
@echo | ||
|
||
test1 : | ||
@echo "===> the source of a very simple valid document" ; echo ; sleep 2 | ||
cat $(EX1) | ||
@sleep 2 ; echo ; echo "===> parser will emit UTF-8" ; echo ; sleep 2 | ||
$(prog) --output-encoding=UTF-8 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with ISO-8859-1 (latin1) output" ; echo ; sleep 2 | ||
$(prog) --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with US-ASCII output" ; echo ; sleep 2 | ||
$(prog) --output-encoding=US-ASCII $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 | ||
$(prog) --show-tree --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> once again, but without any markup" ; echo ; sleep 2 | ||
$(prog) --action=only-text --output-encoding=ISO-8859-1 $(EX1) | ||
@echo | ||
@sleep 2 ; echo ; echo "===> same source, but with CRLF, parser will emit UTF-8" ; echo ; sleep 2 | ||
$(prog) --output-encoding=UTF-8 $(EX1a) | ||
@echo | ||
|
||
test2 : | ||
@echo "===> the source of a xhtml document" ; echo ; sleep 2 | ||
cat $(EX2) | ||
@echo "that document has" `cat $(EX2) | wc -l` "lines" | ||
@sleep 2 ; echo ; echo "===> parser will validate this document and try to indent the output" ; echo ; sleep 2 | ||
$(prog) --indent $(EX2) | ||
@sleep 2 ; echo ; echo "===> once again, but remove all markup" ; echo ; sleep 2 | ||
$(prog) --action=only-text --remove-whitespace $(EX2) | ||
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2 | ||
$(prog) --show-tree --remove-whitespace $(EX2) | ||
|
||
test3 : | ||
@echo "===> namespace processing examples" ; echo ; sleep 2 | ||
@echo "===> namespace propagation test" ; echo ; sleep 2 | ||
$(prog) --verbose --check-namespaces --indent --output-encoding=UTF-8 $(EX3) | ||
@echo | ||
@echo ; sleep 2 ; echo "===> namespace propagation test: tree output with attached namespaces" ; echo ; sleep 2 | ||
$(prog) --verbose --check-namespaces --remove-whitespace --show-tree --output-encoding=ISO-8859-1 $(EX3) | ||
@echo | ||
@echo ; sleep 2 ; echo "===> namespace validation test: this test produces namespace errors" ; echo ; sleep 2 | ||
$(prog) --verbose --do-not-validate --check-namespaces --indent --output-encoding=ISO-8859-1 $(EX3a) || true | ||
@echo | ||
|
||
|
||
test4 : | ||
@echo "===> HTML parsing examples" ; echo ; sleep 2 | ||
@echo "===> the source of a lousy html document" ; echo ; sleep 2 | ||
cat $(EX4) | ||
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to build a document tree" ; echo ; sleep 2 | ||
$(prog) --indent --preserve-comment --parse-html $(EX4) | ||
@echo "===> the source of another lousy html document containing empty elements" ; echo ; sleep 2 | ||
cat $(EX4a) | ||
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to format this as a HTML document without any dangarous empty elements" ; echo ; sleep 2 | ||
$(prog) --indent --preserve-comment --parse-html --output-xhtml $(EX4a) | ||
@echo | ||
|
||
dist : | ||
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR) | ||
cp $(EX) Makefile $(prog).hs $(DIST_DIR) | ||
|
||
clean : | ||
cabal clean | ||
|
||
.PHONY : all test test0 test1 test2 test3 test4 dist clean prof local force |
Oops, something went wrong.