Skip to content

Commit

Permalink
Xml parsing almost working (problem duplicating triples)
Browse files Browse the repository at this point in the history
Ignore-this: 6edd7d618daa91b8408ad933e036aa83

darcs-hash:20110125161343-1786f-03499e960af0e0da29854a810c70bd861b3f9718.gz
  • Loading branch information
amccausl committed Jan 25, 2011
1 parent bc4aa3a commit 0b55908
Showing 1 changed file with 40 additions and 7 deletions.
47 changes: 40 additions & 7 deletions src/Text/RDF/RDF4H/XmlParser.hs
@@ -1,4 +1,4 @@
{-# LANGUAGE Arrows, RankNTypes #-}
{-# LANGUAGE Arrows, RankNTypes, FlexibleContexts #-}
-- |An parser for the RDF/XML format
-- <http://www.w3.org/TR/REC-rdf-syntax/>.

Expand Down Expand Up @@ -26,20 +26,53 @@ parseXmlRDF bUrl dUrl xmlStr = Left (ParseFailure "XML parsing not implemented")
where xml = runLA xread (b2s xmlStr)

-- |Arrow that translates HXT XmlTree to an RDF representation
getRDF :: forall rdf a. (RDF rdf, ArrowXml a) => a XmlTree rdf
getRDF :: forall rdf a. (RDF rdf, ArrowXml a, ArrowState Int a) => a XmlTree rdf
getRDF = proc xml -> do
rdf <- hasName "rdf:RDF" <<< isElem <<< getChildren -< xml
bUrl <- arr (Just . BaseUrl . s2b) <<< getAttrValue "transfer-URI" -< xml
prefixMap <- arr toPrefixMap <<< toAttrMap -< rdf
triples <- (parseDescription <<< hasName "rdf:Description" <<< isElem <<< getChildren) >. id -< rdf
triples <- (parseDescription <<< isElem <<< getChildren) >. id -< rdf
returnA -< mkRdf triples bUrl prefixMap
where toAttrMap = (getAttrl >>> (getName &&& (getChildren >>> getText))) >. id
toPrefixMap = PrefixMappings . Map.fromList . map (\(n, m) -> (s2b (drop 6 n), s2b m)) . filter (startswith "xmlns:" . fst)
toUNode = unode . s2b
parseDescription = proc desc -> do
s <- getAttrValue "rdf:about" -< desc
(p, o) <- ((getName >>> isA (/= "rdf:about")) &&& (getChildren >>> getText)) <<< getAttrl -< desc
returnA -< Triple (toUNode s) (toUNode p) (toUNode o)
s0 <- mkNode -< desc
(s, p, o) <- (arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren) -< (s0, desc)
returnA -< Triple s p o

getPredicatesFromChildren :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
getPredicatesFromChildren s0 = proc rdf -> do
cp <- isElem <<< getChildren -< rdf
p0 <- arr (unode . s2b) <<< getName -< cp
co <- getChildren -< cp
o0 <- mkNode -< co
(s, p, o) <- recursiveParse -< ((s0, p0, o0), (o0, co))
returnA -< (s, p, o)

recursiveParse :: forall a. (ArrowXml a, ArrowState Int a) => a ((Subject, Predicate, Object), (Object, XmlTree)) (Subject, Predicate, Object)
recursiveParse = arr2A constA <+> (arr (snd) >>> wrapKnownDescription)

wrapKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => a (Object, XmlTree) (Subject, Predicate, Object)
wrapKnownDescription = arr2A parseKnownDescription

parseKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
parseKnownDescription s0 = proc desc -> do
(s, p, o) <- arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren -< (s0, desc)
returnA -< (s, p, o)

getPredicatesFromAttr :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
getPredicatesFromAttr s = getAttrl >>> ((getName >>> isA (/= "rdf:about") >>> (arr (unode . s2b))) &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))) >>> arr (attachSubject s)

attachSubject :: Subject -> (Predicate, Object) -> (Subject, Predicate, Object)
attachSubject s (p, o) = (s, p, o)

-- |Read a Node from the "rdf:about" property or generate a blank node
mkNode :: forall a b. (ArrowXml a, ArrowState Int a) => a XmlTree Node
mkNode = (getAttrValue0 "rdf:about" >>> arr (unode . s2b)) `orElse` mkBlankNode

-- |Generate an RDF blank node with incrementing IDs from the arrow state
mkBlankNode :: forall a b. (ArrowState Int a) => a b Node
mkBlankNode = nextState (+1) >>> arr (BNodeGen)

fakeDoc :: ArrowXml a => a XmlTree XmlTree
fakeDoc = mkelem "/"
Expand Down

0 comments on commit 0b55908

Please sign in to comment.