Skip to content

Commit

Permalink
test for hxt-monad
Browse files Browse the repository at this point in the history
  • Loading branch information
Uwe Schmidt committed Jun 24, 2013
1 parent b3df797 commit ed14d32
Show file tree
Hide file tree
Showing 28 changed files with 6,071 additions and 3 deletions.
160 changes: 160 additions & 0 deletions hxt-monad/example/core9-hparser/Main.hs
@@ -0,0 +1,160 @@
-- ------------------------------------------------------------

{- |
HXmlParser - Minimal Validating XML Parser of the Haskell XML Toolbox, no HTTP supported
XML well-formed checker and validator.
this program may be used as example main program for the
API of the Haskell XML Toolbox
commandline parameter evaluation and
and return code is the most complicated part
of this example application
-}

-- ------------------------------------------------------------

module Main
where

import Text.XML.HXT.Core9

import System.Console.GetOpt
import System.Environment
import System.Exit
import System.IO

-- ------------------------------------------------------------

-- |
-- the main program of the Haskell XML Validating Parser

main :: IO ()
main
= do
argv <- getArgs -- get the commandline arguments
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list
[rc] <- runX (parser al src) -- run the parser arrow
exitProg (rc >= c_err) -- set return code and terminate

-- ------------------------------------------------------------

exitProg :: Bool -> IO a
exitProg True = exitWith (ExitFailure (-1))
exitProg False = exitWith ExitSuccess

-- ------------------------------------------------------------

-- |
-- the /real/ main program
--
-- get wellformed document, validates document, propagates and check namespaces
-- and controls output

parser :: SysConfigList -> String -> IOSArrow b Int
parser config src
= configSysVars config -- set all global config options, the output file and the
>>> -- other user options are stored as key-value pairs in the stystem state
readDocument [] src -- no more special read options needed
>>>
( ( traceMsg 1 "start processing document"
>>>
( processDocument $< getSysAttr "action" ) -- ask for the action stored in the key-value list of user defined values
>>>
traceMsg 1 "document processing finished"
)
`when`
documentStatusOk
)
>>>
traceSource
>>>
traceTree
>>>
( (writeDocument [] $< getSysAttr "output-file") -- ask for the output file stored in the system configuration
`whenNot`
( getSysAttr "no-output" >>> isA (== "1") ) -- ask for the no-output attr value in the system key-value list
)
>>>
getErrStatus

-- simple example of a processing arrow, selected by a command line option

processDocument :: String -> IOSArrow XmlTree XmlTree
processDocument "only-text"
= traceMsg 1 "selecting plain text"
>>>
processChildren (deep isText)

processDocument "indent"
= traceMsg 1 "indent document"
>>>
indentDoc

processDocument _action
= traceMsg 1 "default action: do nothing"
>>>
this

-- ------------------------------------------------------------
--
-- the options definition part
-- see doc for System.Console.GetOpt

progName :: String
progName = "HXmlParser"

options :: [OptDescr SysConfig]
options
= generalOptions
++
inputOptions
++
outputOptions
++
showOptions
++
[ Option "q" ["no-output"] (NoArg $ withSysAttr "no-output" "1") "no output of resulting document"
, Option "x" ["action"] (ReqArg (withSysAttr "action") "ACTION") "actions are: only-text, indent, no-op"
]
-- the last 2 option values will be stored by withAttr in the system key-value list
-- and can be read by getSysAttr key

usage :: [String] -> IO a
usage errl
| null errl
= do
hPutStrLn stdout use
exitProg False
| otherwise
= do
hPutStrLn stderr (concat errl ++ "\n" ++ use)
exitProg True
where
header = "HXmlParser - Validating XML Parser of the Haskell XML Toolbox with Monadic Interface\n" ++
"XML well-formed checker, DTD validator, HTML parser.\n\n" ++
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]"
use = usageInfo header options

cmdlineOpts :: [String] -> IO (SysConfigList, String)
cmdlineOpts argv
= case (getOpt Permute options argv) of
(scfg,n,[])
-> do
sa <- src n
help (getConfigAttr a_help scfg) sa
return (scfg, sa)
(_,_,errs)
-> usage errs
where
src [] = return []
src [uri] = return uri
src _ = usage ["only one input uri or file allowed\n"]

help "1" _ = usage []
help _ [] = usage ["no input uri or file given\n"]
help _ _ = return ()

-- ------------------------------------------------------------
107 changes: 107 additions & 0 deletions hxt-monad/example/core9-hparser/Makefile
@@ -0,0 +1,107 @@
prog = dist/build/core9-hparser/core9-hparser

all : $(prog)

$(prog) :
cabal configure && cabal build

force :
cabal clean && $(MAKE) $(prog)

test : $(prog)
@echo "===> run a few simple test cases with the validating parser"
@echo "===> first see all command line options"
$(prog) --help
@echo
$(MAKE) test0 test1 test2 test3 test4

EX1 = ../monad-hparser/example1.xml
EX1a = ../monad-hparser/example1CRLF.xml
EXi = ../monad-hparser/invalid.xml
EX2 = ../../xhtml/xhtml.xml
EX3 = ../monad-hparser/namespace0.xml
EX3a = ../monad-hparser/namespace1.xml
EX4 = ../monad-hparser/lousy.html
EX4a = ../monad-hparser/emptyElements.html

EX = $(wildcard example*.xml) $(wildcard lousy*.html) $(wildcard empty*.html) $(wildcard *valid*.xml *valid*.rng) $(wildcard namespace*.xml)

test0 :
@echo "===> a 1. simple valid document"
$(prog) $(EX1)
@echo
@echo "===> the dom tree of the same document (without any redundant whitespace)"
$(prog) --show-tree --remove-whitespace $(EX1)
@echo
@echo "===> the next test case contains validation erors, it must fail"
$(prog) --verbose $(EXi) || true
@echo
@echo "===> same source, but only wellformed check"
$(prog) --do-not-validate $(EXi) || true
@echo
@echo "===> only validation, no output of an XHTML source"
$(prog) --verbose --no-output $(EX2)
@echo

test1 :
@echo "===> the source of a very simple valid document" ; echo ; sleep 2
cat $(EX1)
@sleep 2 ; echo ; echo "===> parser will emit UTF-8" ; echo ; sleep 2
$(prog) --output-encoding=UTF-8 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with ISO-8859-1 (latin1) output" ; echo ; sleep 2
$(prog) --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with US-ASCII output" ; echo ; sleep 2
$(prog) --output-encoding=US-ASCII $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2
$(prog) --show-tree --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again, but without any markup" ; echo ; sleep 2
$(prog) --action=only-text --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> same source, but with CRLF, parser will emit UTF-8" ; echo ; sleep 2
$(prog) --output-encoding=UTF-8 $(EX1a)
@echo

test2 :
@echo "===> the source of a xhtml document" ; echo ; sleep 2
cat $(EX2)
@echo "that document has" `cat $(EX2) | wc -l` "lines"
@sleep 2 ; echo ; echo "===> parser will validate this document and try to indent the output" ; echo ; sleep 2
$(prog) --indent $(EX2)
@sleep 2 ; echo ; echo "===> once again, but remove all markup" ; echo ; sleep 2
$(prog) --action=only-text --remove-whitespace $(EX2)
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2
$(prog) --show-tree --remove-whitespace $(EX2)

test3 :
@echo "===> namespace processing examples" ; echo ; sleep 2
@echo "===> namespace propagation test" ; echo ; sleep 2
$(prog) --verbose --check-namespaces --indent --output-encoding=UTF-8 $(EX3)
@echo
@echo ; sleep 2 ; echo "===> namespace propagation test: tree output with attached namespaces" ; echo ; sleep 2
$(prog) --verbose --check-namespaces --remove-whitespace --show-tree --output-encoding=ISO-8859-1 $(EX3)
@echo
@echo ; sleep 2 ; echo "===> namespace validation test: this test produces namespace errors" ; echo ; sleep 2
$(prog) --verbose --do-not-validate --check-namespaces --indent --output-encoding=ISO-8859-1 $(EX3a) || true
@echo


test4 :
@echo "===> HTML parsing examples" ; echo ; sleep 2
@echo "===> the source of a lousy html document" ; echo ; sleep 2
cat $(EX4)
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to build a document tree" ; echo ; sleep 2
$(prog) --indent --preserve-comment --parse-html $(EX4)
@echo "===> the source of another lousy html document containing empty elements" ; echo ; sleep 2
cat $(EX4a)
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to format this as a HTML document without any dangarous empty elements" ; echo ; sleep 2
$(prog) --indent --preserve-comment --parse-html --output-xhtml $(EX4a)
@echo

clean :
cabal clean

.PHONY : all test test0 test1 test2 test3 test4 dist clean prof local force
16 changes: 16 additions & 0 deletions hxt-monad/example/core9-hparser/core9-hparser.cabal
@@ -0,0 +1,16 @@
name: core9-hparser
version: 0.1.0.0
synopsis: XML/HTML parser with monadic version of HXT and HXT.9 compatibility mode
description: XML/HTML parser with monadic version of HXT and HXT.9 compatibility mode
license: MIT
author: Uwe Schmidt
maintainer: uwe@fh-wedel.de
copyright: 2013, Uwe Schmidt
category: Testing
build-type: Simple
cabal-version: >=1.8

executable core9-hparser
main-is: Main.hs
build-depends: base,
hxt-monad ==1.0.*
File renamed without changes.
111 changes: 111 additions & 0 deletions hxt-monad/example/monad-hparser/Makefile
@@ -0,0 +1,111 @@
prog = dist/build/monad-hparser/monad-hparser

all : $(prog)

$(prog) :
cabal configure && cabal build

force :
cabal clean && $(MAKE) $(prog)

test : $(prog)
@echo "===> run a few simple test cases with the validating parser"
@echo "===> first see all command line options"
$(prog) --help
@echo
$(MAKE) test0 test1 test2 test3 test4

EX1 = ../monad-hparser/example1.xml
EX1a = ../monad-hparser/example1CRLF.xml
EXi = ../monad-hparser/invalid.xml
EX2 = ../../xhtml/xhtml.xml
EX3 = ../monad-hparser/namespace0.xml
EX3a = ../monad-hparser/namespace1.xml
EX4 = ../monad-hparser/lousy.html
EX4a = ../monad-hparser/emptyElements.html

EX = $(wildcard example*.xml) $(wildcard lousy*.html) $(wildcard empty*.html) $(wildcard *valid*.xml *valid*.rng) $(wildcard namespace*.xml)

test0 :
@echo "===> a 1. simple valid document"
$(prog) $(EX1)
@echo
@echo "===> the dom tree of the same document (without any redundant whitespace)"
$(prog) --show-tree --remove-whitespace $(EX1)
@echo
@echo "===> the next test case contains validation erors, it must fail"
$(prog) --verbose $(EXi) || true
@echo
@echo "===> same source, but only wellformed check"
$(prog) --do-not-validate $(EXi) || true
@echo
@echo "===> only validation, no output of an XHTML source"
$(prog) --verbose --no-output $(EX2)
@echo

test1 :
@echo "===> the source of a very simple valid document" ; echo ; sleep 2
cat $(EX1)
@sleep 2 ; echo ; echo "===> parser will emit UTF-8" ; echo ; sleep 2
$(prog) --output-encoding=UTF-8 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with ISO-8859-1 (latin1) output" ; echo ; sleep 2
$(prog) --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with US-ASCII output" ; echo ; sleep 2
$(prog) --output-encoding=US-ASCII $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2
$(prog) --show-tree --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> once again, but without any markup" ; echo ; sleep 2
$(prog) --action=only-text --output-encoding=ISO-8859-1 $(EX1)
@echo
@sleep 2 ; echo ; echo "===> same source, but with CRLF, parser will emit UTF-8" ; echo ; sleep 2
$(prog) --output-encoding=UTF-8 $(EX1a)
@echo

test2 :
@echo "===> the source of a xhtml document" ; echo ; sleep 2
cat $(EX2)
@echo "that document has" `cat $(EX2) | wc -l` "lines"
@sleep 2 ; echo ; echo "===> parser will validate this document and try to indent the output" ; echo ; sleep 2
$(prog) --indent $(EX2)
@sleep 2 ; echo ; echo "===> once again, but remove all markup" ; echo ; sleep 2
$(prog) --action=only-text --remove-whitespace $(EX2)
@sleep 2 ; echo ; echo "===> once again with hdom tree output" ; echo ; sleep 2
$(prog) --show-tree --remove-whitespace $(EX2)

test3 :
@echo "===> namespace processing examples" ; echo ; sleep 2
@echo "===> namespace propagation test" ; echo ; sleep 2
$(prog) --verbose --check-namespaces --indent --output-encoding=UTF-8 $(EX3)
@echo
@echo ; sleep 2 ; echo "===> namespace propagation test: tree output with attached namespaces" ; echo ; sleep 2
$(prog) --verbose --check-namespaces --remove-whitespace --show-tree --output-encoding=ISO-8859-1 $(EX3)
@echo
@echo ; sleep 2 ; echo "===> namespace validation test: this test produces namespace errors" ; echo ; sleep 2
$(prog) --verbose --do-not-validate --check-namespaces --indent --output-encoding=ISO-8859-1 $(EX3a) || true
@echo


test4 :
@echo "===> HTML parsing examples" ; echo ; sleep 2
@echo "===> the source of a lousy html document" ; echo ; sleep 2
cat $(EX4)
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to build a document tree" ; echo ; sleep 2
$(prog) --indent --preserve-comment --parse-html $(EX4)
@echo "===> the source of another lousy html document containing empty elements" ; echo ; sleep 2
cat $(EX4a)
@sleep 2 ; echo ; echo "===> parser accepts this document and tries to format this as a HTML document without any dangarous empty elements" ; echo ; sleep 2
$(prog) --indent --preserve-comment --parse-html --output-xhtml $(EX4a)
@echo

dist :
[ -d $(DIST_DIR) ] || mkdir -p $(DIST_DIR)
cp $(EX) Makefile $(prog).hs $(DIST_DIR)

clean :
cabal clean

.PHONY : all test test0 test1 test2 test3 test4 dist clean prof local force

0 comments on commit ed14d32

Please sign in to comment.