Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Xml parsing almost working (problem duplicating triples)

Ignore-this: 6edd7d618daa91b8408ad933e036aa83

darcs-hash:20110125161343-1786f-03499e960af0e0da29854a810c70bd861b3f9718.gz
  • Loading branch information...
commit 0b55908b2f5f9fe7cfdfe412bf39ed1cac1a188b 1 parent bc4aa3a
@amccausl authored
Showing with 40 additions and 7 deletions.
  1. +40 −7 src/Text/RDF/RDF4H/XmlParser.hs
View
47 src/Text/RDF/RDF4H/XmlParser.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE Arrows, RankNTypes #-}
+{-# LANGUAGE Arrows, RankNTypes, FlexibleContexts #-}
-- |An parser for the RDF/XML format
-- <http://www.w3.org/TR/REC-rdf-syntax/>.
@@ -26,20 +26,53 @@ parseXmlRDF bUrl dUrl xmlStr = Left (ParseFailure "XML parsing not implemented")
where xml = runLA xread (b2s xmlStr)
-- |Arrow that translates HXT XmlTree to an RDF representation
-getRDF :: forall rdf a. (RDF rdf, ArrowXml a) => a XmlTree rdf
+getRDF :: forall rdf a. (RDF rdf, ArrowXml a, ArrowState Int a) => a XmlTree rdf
getRDF = proc xml -> do
rdf <- hasName "rdf:RDF" <<< isElem <<< getChildren -< xml
bUrl <- arr (Just . BaseUrl . s2b) <<< getAttrValue "transfer-URI" -< xml
prefixMap <- arr toPrefixMap <<< toAttrMap -< rdf
- triples <- (parseDescription <<< hasName "rdf:Description" <<< isElem <<< getChildren) >. id -< rdf
+ triples <- (parseDescription <<< isElem <<< getChildren) >. id -< rdf
returnA -< mkRdf triples bUrl prefixMap
where toAttrMap = (getAttrl >>> (getName &&& (getChildren >>> getText))) >. id
toPrefixMap = PrefixMappings . Map.fromList . map (\(n, m) -> (s2b (drop 6 n), s2b m)) . filter (startswith "xmlns:" . fst)
- toUNode = unode . s2b
parseDescription = proc desc -> do
- s <- getAttrValue "rdf:about" -< desc
- (p, o) <- ((getName >>> isA (/= "rdf:about")) &&& (getChildren >>> getText)) <<< getAttrl -< desc
- returnA -< Triple (toUNode s) (toUNode p) (toUNode o)
+ s0 <- mkNode -< desc
+ (s, p, o) <- (arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren) -< (s0, desc)
+ returnA -< Triple s p o
+
+getPredicatesFromChildren :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
+getPredicatesFromChildren s0 = proc rdf -> do
+ cp <- isElem <<< getChildren -< rdf
+ p0 <- arr (unode . s2b) <<< getName -< cp
+ co <- getChildren -< cp
+ o0 <- mkNode -< co
+ (s, p, o) <- recursiveParse -< ((s0, p0, o0), (o0, co))
+ returnA -< (s, p, o)
+
+recursiveParse :: forall a. (ArrowXml a, ArrowState Int a) => a ((Subject, Predicate, Object), (Object, XmlTree)) (Subject, Predicate, Object)
+recursiveParse = arr2A constA <+> (arr (snd) >>> wrapKnownDescription)
+
+wrapKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => a (Object, XmlTree) (Subject, Predicate, Object)
+wrapKnownDescription = arr2A parseKnownDescription
+
+parseKnownDescription :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
+parseKnownDescription s0 = proc desc -> do
+ (s, p, o) <- arr2A getPredicatesFromAttr <+> arr2A getPredicatesFromChildren -< (s0, desc)
+ returnA -< (s, p, o)
+
+getPredicatesFromAttr :: forall a. (ArrowXml a, ArrowState Int a) => Subject -> a XmlTree (Subject, Predicate, Object)
+getPredicatesFromAttr s = getAttrl >>> ((getName >>> isA (/= "rdf:about") >>> (arr (unode . s2b))) &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))) >>> arr (attachSubject s)
+
+attachSubject :: Subject -> (Predicate, Object) -> (Subject, Predicate, Object)
+attachSubject s (p, o) = (s, p, o)
+
+-- |Read a Node from the "rdf:about" property or generate a blank node
+mkNode :: forall a b. (ArrowXml a, ArrowState Int a) => a XmlTree Node
+mkNode = (getAttrValue0 "rdf:about" >>> arr (unode . s2b)) `orElse` mkBlankNode
+
+-- |Generate an RDF blank node with incrementing IDs from the arrow state
+mkBlankNode :: forall a b. (ArrowState Int a) => a b Node
+mkBlankNode = nextState (+1) >>> arr (BNodeGen)
fakeDoc :: ArrowXml a => a XmlTree XmlTree
fakeDoc = mkelem "/"
Please sign in to comment.
Something went wrong with that request. Please try again.