diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 446a1d1..09e5a68 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE Arrows, RankNTypes #-} +{-# LANGUAGE Arrows, RankNTypes, FlexibleContexts #-} -- |An parser for the RDF/XML format -- . @@ -26,20 +26,53 @@ parseXmlRDF bUrl dUrl xmlStr = Left (ParseFailure "XML parsing not implemented") where xml = runLA xread (b2s xmlStr) -- |Arrow that translates HXT XmlTree to an RDF representation -getRDF :: forall rdf a. (RDF rdf, ArrowXml a) => a XmlTree rdf +getRDF :: forall rdf a. (RDF rdf, ArrowXml a, ArrowState Int a) => a XmlTree rdf getRDF = proc xml -> do rdf <- hasName "rdf:RDF" <<< isElem <<< getChildren -< xml bUrl <- arr (Just . BaseUrl . s2b) <<< getAttrValue "transfer-URI" -< xml prefixMap <- arr toPrefixMap <<< toAttrMap -< rdf - triples <- (parseDescription <<< hasName "rdf:Description" <<< isElem <<< getChildren) >. id -< rdf + triples <- (parseDescription <<< isElem <<< getChildren) >. id -< rdf returnA -< mkRdf triples bUrl prefixMap where toAttrMap = (getAttrl >>> (getName &&& (getChildren >>> getText))) >. id toPrefixMap = PrefixMappings . Map.fromList . map (\(n, m) -> (s2b (drop 6 n), s2b m)) . filter (startswith "xmlns:" . fst) - toUNode = unode . s2b parseDescription = proc desc -> do - s <- getAttrValue "rdf:about" -< desc - (p, o) <- ((getName >>> isA (/= "rdf:about")) &&& (getChildren >>> getText)) <<< getAttrl -< desc - returnA -< Triple (toUNode s) (toUNode p) (toUNode o) + s0 <- mkNode -< desc + (s, p, o) <- (arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren) -< (s0, desc) + returnA -< Triple s p o + +getPredicatesFromChildren :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object) +getPredicatesFromChildren s0 = proc rdf -> do + cp <- isElem <<< getChildren -< rdf + p0 <- arr (unode . s2b) <<< getName -< cp + co <- getChildren -< cp + o0 <- mkNode -< co + (s, p, o) <- recursiveParse -< ((s0, p0, o0), (o0, co)) + returnA -< (s, p, o) + +recursiveParse :: forall a. (ArrowXml a, ArrowState Int a) => a ((Subject, Predicate, Object), (Object, XmlTree)) (Subject, Predicate, Object) +recursiveParse = arr2A constA <+> (arr (snd) >>> wrapKnownDescription) + +wrapKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => a (Object, XmlTree) (Subject, Predicate, Object) +wrapKnownDescription = arr2A parseKnownDescription + +parseKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object) +parseKnownDescription s0 = proc desc -> do + (s, p, o) <- arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren -< (s0, desc) + returnA -< (s, p, o) + +getPredicatesFromAttr :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object) +getPredicatesFromAttr s = getAttrl >>> ((getName >>> isA (/= "rdf:about") >>> (arr (unode . s2b))) &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))) >>> arr (attachSubject s) + +attachSubject :: Subject -> (Predicate, Object) -> (Subject, Predicate, Object) +attachSubject s (p, o) = (s, p, o) + +-- |Read a Node from the "rdf:about" property or generate a blank node +mkNode :: forall a b. (ArrowXml a, ArrowState Int a) => a XmlTree Node +mkNode = (getAttrValue0 "rdf:about" >>> arr (unode . s2b)) `orElse` mkBlankNode + +-- |Generate an RDF blank node with incrementing IDs from the arrow state +mkBlankNode :: forall a b. (ArrowState Int a) => a b Node +mkBlankNode = nextState (+1) >>> arr (BNodeGen) fakeDoc :: ArrowXml a => a XmlTree XmlTree fakeDoc = mkelem "/"