Permalink
Browse files

Refactor code for cleanlyness and readability

Ignore-this: c447878184a26a10ab41677a75e8490a

darcs-hash:20110202225706-1786f-f7ba6604db014649365161d027c9180e97ca3a37.gz
  • Loading branch information...
1 parent 438c176 commit 9a402046da639061dacf54dfecd6ece433f14fc7 @amccausl committed Feb 2, 2011
Showing with 91 additions and 59 deletions.
  1. +91 −59 src/Text/RDF/RDF4H/XmlParser.hs
@@ -19,22 +19,26 @@ import Data.String.Utils
-- |Global state for the parser
data GParseState = GParseState { stateGenId :: Int
}
+ deriving(Show)
-- |Local state for the parser (dependant on the parent xml elements)
data LParseState = LParseState { stateBaseUrl :: BaseUrl
, stateLang :: Maybe String
, stateSubject :: Subject
}
+ deriving(Show)
-- |Parse a xml ByteString to an RDF representation
parseXmlRDF :: forall rdf. (RDF rdf)
=> Maybe BaseUrl -- ^ The base URL for the RDF if required
-> Maybe ByteString -- ^ DocUrl: The request URL for the RDF if available
-> ByteString -- ^ The contents to parse
-> Either ParseFailure rdf -- ^ The RDF representation of the triples or ParseFailure
-parseXmlRDF bUrl dUrl xmlStr = case runSLA (xread >>> addMetaData bUrl dUrl >>> getRDF) (GParseState { stateGenId = 0 }) (b2s xmlStr) of
+parseXmlRDF bUrl dUrl xmlStr = case runParseArrow of
(_,r:_) -> Right r
_ -> Left (ParseFailure "XML parsing failed")
+ where runParseArrow = runSLA (xread >>> addMetaData bUrl dUrl >>> getRDF) initState (b2s xmlStr)
+ initState = GParseState { stateGenId = 0 }
-- |Add a root tag to a given XmlTree to appear as if it was read from a readDocument function
addMetaData :: (ArrowXml a) => Maybe BaseUrl -> Maybe ByteString -> a XmlTree XmlTree
@@ -70,30 +74,32 @@ parseDescription' = proc (bUrl, rdf) -> do
-- |Read an rdf:Description tag to its corresponding Triples
parseDescription :: forall a. (ArrowXml a, ArrowState GParseState a) => a (LParseState, XmlTree) Triple
-parseDescription = (updateState
+parseDescription = updateState
>>> (arr2A parsePredicatesFromAttr
<+> (second (getChildren >>> isElem) >>> parsePredicatesFromChildren)
- <+> (second (neg (hasName "rdf:Description")) >>> arr2A readTypeTriple))) -- If the rdf:Description element has another name, that is it's type
- >>. (replaceLiElems [] (1 :: Int))
+ <+> (second (neg (hasName "rdf:Description")) >>> arr2A readTypeTriple))
+ >>. replaceLiElems [] (1 :: Int)
where readTypeTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
- readTypeTriple state = getName >>> arr ((Triple (stateSubject state) ((unode . s2b) "rdf:type")) . unode . s2b)
- replaceLiElems acc n (Triple s p o : rest) | p == (unode . s2b) "rdf:li" = replaceLiElems (Triple s ((unode . s2b) ("rdf:_" ++ show n)) o : acc) (n + 1) rest
+ readTypeTriple state = getName >>> arr ((Triple (stateSubject state) rdfType) . unode . s2b)
+ replaceLiElems acc n (Triple s p o : rest) | p == (unode . s2b) "rdf:li" =
+ replaceLiElems (Triple s ((unode . s2b) ("rdf:_" ++ show n)) o : acc) (n + 1) rest
replaceLiElems acc n (Triple s p o : rest) = replaceLiElems (Triple s p o : acc) n rest
replaceLiElems acc _ [] = acc
-- |Parse the current predicate element as a rdf:Description element (used when rdf:parseType = "Resource")
parseAsResource :: forall a. (ArrowXml a, ArrowState GParseState a) => Node -> a (LParseState, XmlTree) Triple
-parseAsResource n = (updateState
- >>> (arr2A parsePredicatesFromAttr
- <+> (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
- <+> (arr (\s -> s { stateSubject = n }) *** (getChildren >>> isElem) >>> parsePredicatesFromChildren))) -- If the rdf:Description element has another name, that is it's type
+parseAsResource n = updateState
+ >>> (arr2A parsePredicatesFromAttr
+ <+> (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
+ <+> (arr (\s -> s { stateSubject = n }) *** (getChildren >>> isElem) >>> parsePredicatesFromChildren))
-- |Read the attributes of an rdf:Description element. These correspond to the Predicate Object pairs of the Triple
parsePredicatesFromAttr :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
-parsePredicatesFromAttr s = getAttrl >>> ((getName >>> neg (isMetaAttr)
- >>> (arr (unode . s2b)))
- &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))) >>> arr (attachSubject (stateSubject s))
+parsePredicatesFromAttr state = getAttrl
+ >>> (getName >>> neg (isMetaAttr) >>> mkUNode) &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))
+ >>> arr (attachSubject (stateSubject state))
+-- | Arrow to determine if special processing is required for an attribute
isMetaAttr :: forall a. (ArrowXml a, ArrowState GParseState a) => a String String
isMetaAttr = isA (== "rdf:about")
<+> isA (== "rdf:nodeID")
@@ -102,30 +108,34 @@ isMetaAttr = isA (== "rdf:about")
<+> isA (== "rdf:parseType")
-- |Read a children of an rdf:Description element. These correspond to the Predicate portion of the Triple
-parsePredicatesFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a) => a (LParseState, XmlTree) Triple
+parsePredicatesFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a)
+ => a (LParseState, XmlTree) Triple
parsePredicatesFromChildren = updateState
- >>> choiceA [ second (hasAttrValue "rdf:parseType" (== "Literal")) :-> arr2A parseAsLiteralTriple
- , second (hasAttrValue "rdf:parseType" (== "Resource")) :-> (defaultA
- <+> (mkBlankNode &&& arr id >>> arr2A parseAsResource))
- , second (hasAttrValue "rdf:parseType" (== "Collection")) :-> (listA (defaultA >>> arr id &&& mkBlankNode) >>> mkCollectionTriples >>> unlistA)
- , second (hasAttr "rdf:datatype") :-> arr2A getTypedTriple
- , second (hasAttr "rdf:resource") :-> arr2A getResourceTriple
- , second (hasAttr "rdf:nodeID") :-> arr2A getNodeIdTriple
- , second (hasAttr "rdf:ID") :-> (arr2A mkRelativeNode &&& defaultA >>> arr2A reifyTriple >>> unlistA)
- , this :-> (defaultA
- <+> ((second getAttrl &&& (neg . second) (getAttrl >>> getName >>> isMetaAttr)) `guards` (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)))
- ]
+ >>> choiceA
+ [ second (hasAttrValue "rdf:parseType" (== "Literal")) :-> arr2A parseAsLiteralTriple
+ , second (hasAttrValue "rdf:parseType" (== "Resource")) :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parseAsResource))
+ , second (hasAttrValue "rdf:parseType" (== "Collection")) :-> (listA (defaultA >>> arr id &&& mkBlankNode) >>> mkCollectionTriples >>> unlistA)
+ , second (hasAttr "rdf:datatype") :-> arr2A getTypedTriple
+ , second (hasAttr "rdf:resource") :-> arr2A getResourceTriple
+ , second (hasAttr "rdf:nodeID") :-> arr2A getNodeIdTriple
+ , second (hasAttr "rdf:ID") :-> (arr2A mkRelativeNode &&& defaultA >>> arr2A reifyTriple >>> unlistA)
+ , second (hasPredicateAttr) :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr))
+ , this :-> defaultA
+ ]
where defaultA = proc (state, predXml) -> do
p <- arr(unode . s2b) <<< getName -< predXml
t <- arr2A (\s -> arr2A (parseObjectsFromChildren s)) <<< second (second getChildren) -< (state, (p, predXml))
returnA -< t
parsePredicateAttr n = (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
<+> (first (arr (\s -> s { stateSubject = n })) >>> arr2A parsePredicatesFromAttr)
+ hasPredicateAttr = getAttrl >>> neg (getName >>> isMetaAttr)
parseObjectsFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a)
=> LParseState -> Predicate -> a XmlTree Triple
-parseObjectsFromChildren s p = (isText >>> getText >>> arr ((Triple (stateSubject s) p) . mkLiteralNode s))
- <+> (isElem >>> hasName "rdf:Description" >>> parseObjectDescription) -- TODO: include ability to alias for rdf:type
+parseObjectsFromChildren s p = choiceA
+ [ isText :-> (getText >>> arr ((Triple (stateSubject s) p) . mkLiteralNode s))
+ , isElem :-> (hasName "rdf:Description" >>> parseObjectDescription)
+ ]
where parseObjectDescription = proc desc -> do
o <- mkNode s -< desc
t0 <- arr (\(sub, (p, o)) -> Triple sub p o) -< (stateSubject s, (p, o))
@@ -142,11 +152,6 @@ reifyTriple node = arr (\(Triple s p o) -> [ Triple s p o
, Triple node rdfPredicate p
, Triple node rdfObject o
])
- where rdfType = (unode . s2b) "rdf:type"
- rdfStatement = (unode . s2b) "rdf:Statement"
- rdfSubject = (unode . s2b) "rdf:subject"
- rdfPredicate = (unode . s2b) "rdf:predicate"
- rdfObject = (unode . s2b) "rdf:object"
-- |Updates the local state at a given node
updateState :: forall a. (ArrowXml a, ArrowState GParseState a)
@@ -158,47 +163,73 @@ updateState = (ifA (second (hasAttr "xml:lang")) (arr2A readLang) (arr id))
-- |Read a Triple with an rdf:parseType of Literal
parseAsLiteralTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
-parseAsLiteralTriple state = ((getName >>> arr (unode . s2b)) &&& (xshow ( getChildren ) >>> arr (mkTypedLiteralNode state nodeType))) >>> arr (attachSubject (stateSubject state))
- where nodeType = mkFastString (s2b "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
+parseAsLiteralTriple state = (nameToUNode &&& (xshow ( getChildren ) >>> arr (mkTypedLiteralNode rdfXmlLiteral)))
+ >>> arr (attachSubject (stateSubject state))
mkCollectionTriples :: forall a. (ArrowXml a, ArrowState GParseState a) => a [(Triple, Node)] Triples
mkCollectionTriples = arr (mkCollectionTriples' [])
- where mkCollectionTriples' [] ((Triple s1 p1 o1, n1):rest) = mkCollectionTriples' [Triple s1 p1 n1] ((Triple s1 p1 o1, n1):rest)
- mkCollectionTriples' acc ((Triple _ _ o1, n1):(t2, n2):rest) = mkCollectionTriples' (Triple n1 headNode o1 : Triple n1 tailNode n2 : acc) ((t2, n2):rest)
- mkCollectionTriples' acc [(Triple _ _ o1, n1)] = Triple n1 headNode o1 : Triple n1 tailNode nilNode : acc
+ where mkCollectionTriples' [] ((Triple s1 p1 o1, n1):rest) =
+ mkCollectionTriples' [Triple s1 p1 n1] ((Triple s1 p1 o1, n1):rest)
+ mkCollectionTriples' acc ((Triple _ _ o1, n1):(t2, n2):rest) =
+ mkCollectionTriples' (Triple n1 rdfFirst o1 : Triple n1 rdfRest n2 : acc) ((t2, n2):rest)
+ mkCollectionTriples' acc [(Triple _ _ o1, n1)] =
+ Triple n1 rdfFirst o1 : Triple n1 rdfRest rdfNil : acc
mkCollectionTriples' _ [] = []
- headNode = (unode . s2b) "rdf:first"
- tailNode = (unode . s2b) "rdf:rest"
- nilNode = (unode . s2b) "rdf:nil"
-- |Read a Triple and it's type when rdf:datatype is available
getTypedTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
-getTypedTriple state = ((getName >>> arr (unode . s2b)) &&& ((getAttrValue "rdf:datatype" &&& baseUrl >>> expandURI) &&& xshow (getChildren) >>> arr (\(t, v) -> lnode (typedL (s2b v) (mkFastString (s2b t)))))) >>> arr (attachSubject (stateSubject state))
- where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
+getTypedTriple state = nameToUNode &&& (attrExpandURI state "rdf:datatype" &&& xshow getChildren >>> arr (\(t, v) -> mkTypedLiteralNode (mkFastString (s2b t)) v))
+ >>> arr (attachSubject (stateSubject state))
-getResourceTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
-getResourceTriple state = ((getName >>> arr (unode . s2b)) &&& ((getAttrValue "rdf:resource" &&& baseUrl) >>> expandURI >>> arr (unode . s2b))) >>> arr (attachSubject (stateSubject state))
- where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
+getResourceTriple :: forall a. (ArrowXml a, ArrowState GParseState a)
+ => LParseState -> a XmlTree Triple
+getResourceTriple state = nameToUNode &&& (attrExpandURI state "rdf:resource" >>> mkUNode)
+ >>> arr (attachSubject (stateSubject state))
-getNodeIdTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
-getNodeIdTriple state = ((getName >>> arr (unode . s2b)) &&& (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))) >>> arr (attachSubject (stateSubject state))
+getNodeIdTriple :: forall a. (ArrowXml a, ArrowState GParseState a)
+ => LParseState -> a XmlTree Triple
+getNodeIdTriple state = nameToUNode &&& (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
+ >>> arr (attachSubject (stateSubject state))
-- |Read a Node from the "rdf:about" property or generate a blank node
mkNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
-mkNode s = choiceA [ hasAttr "rdf:about" :-> (getAttrValue "rdf:about" &&& baseUrl >>> expandURI >>> arr (unode . s2b))
- , hasAttr "rdf:resource" :-> (getAttrValue "rdf:resource" &&& baseUrl >>> expandURI >>> arr (unode . s2b))
- , hasAttr "rdf:nodeID" :-> (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
- , hasAttr "rdf:ID" :-> mkRelativeNode s
- , this :-> mkBlankNode
- ]
- where baseUrl = constA (case stateBaseUrl s of BaseUrl b -> b2s b)
+mkNode state = choiceA [ hasAttr "rdf:about" :-> (attrExpandURI state "rdf:about" >>> mkUNode)
+ , hasAttr "rdf:resource" :-> (attrExpandURI state "rdf:resource" >>> mkUNode)
+ , hasAttr "rdf:nodeID" :-> (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
+ , hasAttr "rdf:ID" :-> mkRelativeNode state
+ , this :-> mkBlankNode
+ ]
+
+rdfXmlLiteral = (mkFastString . s2b) "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
+rdfFirst = (unode . s2b) "rdf:first"
+rdfRest = (unode . s2b) "rdf:rest"
+rdfNil = (unode . s2b) "rdf:nil"
+rdfType = (unode . s2b) "rdf:type"
+rdfStatement = (unode . s2b) "rdf:Statement"
+rdfSubject = (unode . s2b) "rdf:subject"
+rdfPredicate = (unode . s2b) "rdf:predicate"
+rdfObject = (unode . s2b) "rdf:object"
+
+nameToUNode :: forall a. (ArrowXml a) => a XmlTree Node
+nameToUNode = getName >>> mkUNode
+
+attrExpandURI :: forall a. (ArrowXml a) => LParseState -> String -> a XmlTree String
+attrExpandURI state attr = getAttrValue attr &&& baseUrl >>> expandURI
+ where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
+
+-- |Make a UNode from an absolute string
+mkUNode :: forall a. (Arrow a) => a String Node
+mkUNode = arr (unode . s2b)
---mkRelativeNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
-mkRelativeNode s = (getAttrValue "rdf:ID" >>> arr (\x -> '#':x)) &&& baseUrl >>> expandURI >>> arr (unode . s2b)
+-- |Make a UNode from a rdf:ID element, expanding relative URIs
+mkRelativeNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
+mkRelativeNode s = (getAttrValue "rdf:ID" >>> arr (\x -> '#':x)) &&& baseUrl
+ >>> expandURI >>> arr (unode . s2b)
where baseUrl = constA (case stateBaseUrl s of BaseUrl b -> b2s b)
-mkTypedLiteralNode :: LParseState -> FastString -> String -> Node
-mkTypedLiteralNode (LParseState _ _ _) t content = lnode (typedL (s2b content) t)
+-- |Make a literal node with the given type and content
+mkTypedLiteralNode :: FastString -> String -> Node
+mkTypedLiteralNode t content = lnode (typedL (s2b content) t)
-- |Use the given state to create a literal node
mkLiteralNode :: LParseState -> String -> Node
@@ -207,5 +238,6 @@ mkLiteralNode (LParseState _ Nothing _) content = (lnode . plainL . s2b) content
-- |Generate an RDF blank node with incrementing IDs from the arrow state
mkBlankNode :: forall a b. (ArrowState GParseState a) => a b Node
-mkBlankNode = nextState (\gState -> gState { stateGenId = stateGenId gState + 1 } ) >>> arr (BNodeGen . stateGenId)
+mkBlankNode = nextState (\gState -> gState { stateGenId = stateGenId gState + 1 })
+ >>> arr (BNodeGen . stateGenId)

0 comments on commit 9a40204

Please sign in to comment.