Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Refactor code for cleanlyness and readability

Ignore-this: c447878184a26a10ab41677a75e8490a

darcs-hash:20110202225706-1786f-f7ba6604db014649365161d027c9180e97ca3a37.gz
  • Loading branch information...
commit 9a402046da639061dacf54dfecd6ece433f14fc7 1 parent 438c176
Alex McCausland authored February 02, 2011

Showing 1 changed file with 91 additions and 59 deletions. Show diff stats Hide diff stats

  1. 150  src/Text/RDF/RDF4H/XmlParser.hs
150  src/Text/RDF/RDF4H/XmlParser.hs
@@ -19,12 +19,14 @@ import Data.String.Utils
19 19
 -- |Global state for the parser
20 20
 data GParseState = GParseState { stateGenId :: Int
21 21
                                }
  22
+  deriving(Show)
22 23
 
23 24
 -- |Local state for the parser (dependant on the parent xml elements)
24 25
 data LParseState = LParseState { stateBaseUrl :: BaseUrl
25 26
                                , stateLang :: Maybe String
26 27
                                , stateSubject :: Subject
27 28
                                }
  29
+  deriving(Show)
28 30
 
29 31
 -- |Parse a xml ByteString to an RDF representation
30 32
 parseXmlRDF :: forall rdf. (RDF rdf)
@@ -32,9 +34,11 @@ parseXmlRDF :: forall rdf. (RDF rdf)
32 34
             -> Maybe ByteString        -- ^ DocUrl: The request URL for the RDF if available
33 35
             -> ByteString              -- ^ The contents to parse
34 36
             -> Either ParseFailure rdf -- ^ The RDF representation of the triples or ParseFailure
35  
-parseXmlRDF bUrl dUrl xmlStr = case runSLA (xread >>> addMetaData bUrl dUrl >>> getRDF) (GParseState { stateGenId = 0 }) (b2s xmlStr) of
  37
+parseXmlRDF bUrl dUrl xmlStr = case runParseArrow of
36 38
                                 (_,r:_) -> Right r
37 39
                                 _ -> Left (ParseFailure "XML parsing failed")
  40
+  where runParseArrow = runSLA (xread >>> addMetaData bUrl dUrl >>> getRDF) initState (b2s xmlStr)
  41
+        initState = GParseState { stateGenId = 0 }
38 42
 
39 43
 -- |Add a root tag to a given XmlTree to appear as if it was read from a readDocument function
40 44
 addMetaData :: (ArrowXml a) => Maybe BaseUrl -> Maybe ByteString -> a XmlTree XmlTree
@@ -70,30 +74,32 @@ parseDescription' = proc (bUrl, rdf) -> do
70 74
 
71 75
 -- |Read an rdf:Description tag to its corresponding Triples
72 76
 parseDescription :: forall a. (ArrowXml a, ArrowState GParseState a) => a (LParseState, XmlTree) Triple
73  
-parseDescription = (updateState
  77
+parseDescription = updateState
74 78
                >>> (arr2A parsePredicatesFromAttr
75 79
                    <+> (second (getChildren >>> isElem) >>> parsePredicatesFromChildren)
76  
-                   <+> (second (neg (hasName "rdf:Description")) >>> arr2A readTypeTriple))) -- If the rdf:Description element has another name, that is it's type
77  
-               >>. (replaceLiElems [] (1 :: Int))
  80
+                   <+> (second (neg (hasName "rdf:Description")) >>> arr2A readTypeTriple))
  81
+               >>. replaceLiElems [] (1 :: Int)
78 82
   where readTypeTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
79  
-        readTypeTriple state = getName >>> arr ((Triple (stateSubject state) ((unode . s2b) "rdf:type")) . unode . s2b)
80  
-        replaceLiElems acc n (Triple s p o : rest) | p == (unode . s2b) "rdf:li" = replaceLiElems (Triple s ((unode . s2b) ("rdf:_" ++ show n)) o : acc) (n + 1) rest
  83
+        readTypeTriple state = getName >>> arr ((Triple (stateSubject state) rdfType) . unode . s2b)
  84
+        replaceLiElems acc n (Triple s p o : rest) | p == (unode . s2b) "rdf:li" =
  85
+            replaceLiElems (Triple s ((unode . s2b) ("rdf:_" ++ show n)) o : acc) (n + 1) rest
81 86
         replaceLiElems acc n (Triple s p o : rest) = replaceLiElems (Triple s p o : acc) n rest
82 87
         replaceLiElems acc _ [] = acc
83 88
 
84 89
 -- |Parse the current predicate element as a rdf:Description element (used when rdf:parseType = "Resource")
85 90
 parseAsResource :: forall a. (ArrowXml a, ArrowState GParseState a) => Node -> a (LParseState, XmlTree) Triple
86  
-parseAsResource n = (updateState
87  
-               >>> (arr2A parsePredicatesFromAttr
88  
-                   <+> (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
89  
-                   <+> (arr (\s -> s { stateSubject = n }) *** (getChildren >>> isElem) >>> parsePredicatesFromChildren))) -- If the rdf:Description element has another name, that is it's type
  91
+parseAsResource n = updateState
  92
+    >>>     (arr2A parsePredicatesFromAttr
  93
+        <+> (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
  94
+        <+> (arr (\s -> s { stateSubject = n }) *** (getChildren >>> isElem) >>> parsePredicatesFromChildren))
90 95
 
91 96
 -- |Read the attributes of an rdf:Description element.  These correspond to the Predicate Object pairs of the Triple
92 97
 parsePredicatesFromAttr :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
93  
-parsePredicatesFromAttr s = getAttrl >>> ((getName >>> neg (isMetaAttr)
94  
-                                            >>> (arr (unode . s2b)))
95  
-                                        &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))) >>> arr (attachSubject (stateSubject s))
  98
+parsePredicatesFromAttr state = getAttrl
  99
+    >>> (getName >>> neg (isMetaAttr) >>> mkUNode) &&& (getChildren >>> getText >>> arr (lnode . plainL . s2b))
  100
+    >>> arr (attachSubject (stateSubject state))
96 101
 
  102
+-- | Arrow to determine if special processing is required for an attribute
97 103
 isMetaAttr :: forall a. (ArrowXml a, ArrowState GParseState a) => a String String
98 104
 isMetaAttr = isA (== "rdf:about")
99 105
          <+> isA (== "rdf:nodeID")
@@ -102,30 +108,34 @@ isMetaAttr = isA (== "rdf:about")
102 108
          <+> isA (== "rdf:parseType")
103 109
 
104 110
 -- |Read a children of an rdf:Description element.  These correspond to the Predicate portion of the Triple
105  
-parsePredicatesFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a) => a (LParseState, XmlTree) Triple
  111
+parsePredicatesFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a)
  112
+                            => a (LParseState, XmlTree) Triple
106 113
 parsePredicatesFromChildren = updateState
107  
-                          >>> choiceA [ second (hasAttrValue "rdf:parseType" (== "Literal")) :-> arr2A parseAsLiteralTriple
108  
-                                      , second (hasAttrValue "rdf:parseType" (== "Resource")) :-> (defaultA
109  
-                                                                                               <+> (mkBlankNode &&& arr id >>> arr2A parseAsResource))
110  
-                                      , second (hasAttrValue "rdf:parseType" (== "Collection")) :-> (listA (defaultA >>> arr id &&& mkBlankNode) >>> mkCollectionTriples >>> unlistA)
111  
-                                      , second (hasAttr "rdf:datatype") :-> arr2A getTypedTriple
112  
-                                      , second (hasAttr "rdf:resource") :-> arr2A getResourceTriple
113  
-                                      , second (hasAttr "rdf:nodeID") :-> arr2A getNodeIdTriple
114  
-                                      , second (hasAttr "rdf:ID") :-> (arr2A mkRelativeNode &&& defaultA >>> arr2A reifyTriple >>> unlistA)
115  
-                                      , this :-> (defaultA
116  
-                                              <+> ((second getAttrl &&& (neg . second) (getAttrl >>> getName >>> isMetaAttr)) `guards` (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)))
117  
-                                      ]
  114
+    >>> choiceA
  115
+        [ second (hasAttrValue "rdf:parseType" (== "Literal")) :-> arr2A parseAsLiteralTriple
  116
+        , second (hasAttrValue "rdf:parseType" (== "Resource")) :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parseAsResource))
  117
+        , second (hasAttrValue "rdf:parseType" (== "Collection")) :-> (listA (defaultA >>> arr id &&& mkBlankNode) >>> mkCollectionTriples >>> unlistA)
  118
+        , second (hasAttr "rdf:datatype") :-> arr2A getTypedTriple
  119
+        , second (hasAttr "rdf:resource") :-> arr2A getResourceTriple
  120
+        , second (hasAttr "rdf:nodeID") :-> arr2A getNodeIdTriple
  121
+        , second (hasAttr "rdf:ID") :-> (arr2A mkRelativeNode &&& defaultA >>> arr2A reifyTriple >>> unlistA)
  122
+        , second (hasPredicateAttr) :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr))
  123
+        , this :-> defaultA
  124
+        ]
118 125
   where defaultA = proc (state, predXml) -> do
119 126
                          p <- arr(unode . s2b) <<< getName -< predXml
120 127
                          t <- arr2A (\s -> arr2A (parseObjectsFromChildren s)) <<< second (second getChildren) -< (state, (p, predXml))
121 128
                          returnA -< t
122 129
         parsePredicateAttr n = (second getName >>> arr (\(s, p) -> Triple (stateSubject s) ((unode . s2b) p) n))
123 130
                            <+> (first (arr (\s -> s { stateSubject = n })) >>> arr2A parsePredicatesFromAttr)
  131
+        hasPredicateAttr = getAttrl >>> neg (getName >>> isMetaAttr)
124 132
 
125 133
 parseObjectsFromChildren :: forall a. (ArrowXml a, ArrowState GParseState a)
126 134
                          => LParseState -> Predicate -> a XmlTree Triple
127  
-parseObjectsFromChildren s p = (isText >>> getText >>> arr ((Triple (stateSubject s) p) . mkLiteralNode s))
128  
-                           <+> (isElem >>> hasName "rdf:Description" >>> parseObjectDescription) -- TODO: include ability to alias for rdf:type
  135
+parseObjectsFromChildren s p = choiceA
  136
+    [ isText :-> (getText >>> arr ((Triple (stateSubject s) p) . mkLiteralNode s))
  137
+    , isElem :-> (hasName "rdf:Description" >>> parseObjectDescription)
  138
+    ]
129 139
   where parseObjectDescription = proc desc -> do
130 140
                                       o <- mkNode s -< desc
131 141
                                       t0 <- arr (\(sub, (p, o)) -> Triple sub p o) -< (stateSubject s, (p, o))
@@ -142,11 +152,6 @@ reifyTriple node = arr (\(Triple s p o) -> [ Triple s p o
142 152
                                            , Triple node rdfPredicate p
143 153
                                            , Triple node rdfObject o
144 154
                                            ])
145  
-  where rdfType = (unode . s2b) "rdf:type"
146  
-        rdfStatement = (unode . s2b) "rdf:Statement"
147  
-        rdfSubject = (unode . s2b) "rdf:subject"
148  
-        rdfPredicate = (unode . s2b) "rdf:predicate"
149  
-        rdfObject = (unode . s2b) "rdf:object"
150 155
 
151 156
 -- |Updates the local state at a given node
152 157
 updateState :: forall a. (ArrowXml a, ArrowState GParseState a)
@@ -158,47 +163,73 @@ updateState = (ifA (second (hasAttr "xml:lang")) (arr2A readLang) (arr id))
158 163
 
159 164
 -- |Read a Triple with an rdf:parseType of Literal
160 165
 parseAsLiteralTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
161  
-parseAsLiteralTriple state = ((getName >>> arr (unode . s2b)) &&& (xshow ( getChildren ) >>> arr (mkTypedLiteralNode state nodeType))) >>> arr (attachSubject (stateSubject state))
162  
-  where nodeType = mkFastString (s2b "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
  166
+parseAsLiteralTriple state = (nameToUNode &&& (xshow ( getChildren ) >>> arr (mkTypedLiteralNode rdfXmlLiteral)))
  167
+    >>> arr (attachSubject (stateSubject state))
163 168
 
164 169
 mkCollectionTriples :: forall a. (ArrowXml a, ArrowState GParseState a) => a [(Triple, Node)] Triples
165 170
 mkCollectionTriples = arr (mkCollectionTriples' [])
166  
-  where mkCollectionTriples' [] ((Triple s1 p1 o1, n1):rest) = mkCollectionTriples' [Triple s1 p1 n1] ((Triple s1 p1 o1, n1):rest)
167  
-        mkCollectionTriples' acc ((Triple _ _ o1, n1):(t2, n2):rest) = mkCollectionTriples' (Triple n1 headNode o1 : Triple n1 tailNode n2 : acc) ((t2, n2):rest)
168  
-        mkCollectionTriples' acc [(Triple _ _ o1, n1)] = Triple n1 headNode o1 : Triple n1 tailNode nilNode : acc
  171
+  where mkCollectionTriples' [] ((Triple s1 p1 o1, n1):rest) =
  172
+            mkCollectionTriples' [Triple s1 p1 n1] ((Triple s1 p1 o1, n1):rest)
  173
+        mkCollectionTriples' acc ((Triple _ _ o1, n1):(t2, n2):rest) =
  174
+            mkCollectionTriples' (Triple n1 rdfFirst o1 : Triple n1 rdfRest n2 : acc) ((t2, n2):rest)
  175
+        mkCollectionTriples' acc [(Triple _ _ o1, n1)] =
  176
+            Triple n1 rdfFirst o1 : Triple n1 rdfRest rdfNil : acc
169 177
         mkCollectionTriples' _ [] = []
170  
-        headNode = (unode . s2b) "rdf:first"
171  
-        tailNode = (unode . s2b) "rdf:rest"
172  
-        nilNode = (unode . s2b) "rdf:nil"
173 178
 
174 179
 -- |Read a Triple and it's type when rdf:datatype is available
175 180
 getTypedTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
176  
-getTypedTriple state = ((getName >>> arr (unode . s2b)) &&& ((getAttrValue "rdf:datatype" &&& baseUrl >>> expandURI) &&& xshow (getChildren) >>> arr (\(t, v) -> lnode (typedL (s2b v) (mkFastString (s2b t)))))) >>> arr (attachSubject (stateSubject state))
177  
-  where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
  181
+getTypedTriple state = nameToUNode &&& (attrExpandURI state "rdf:datatype" &&& xshow getChildren >>> arr (\(t, v) -> mkTypedLiteralNode (mkFastString (s2b t)) v))
  182
+    >>> arr (attachSubject (stateSubject state))
178 183
 
179  
-getResourceTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
180  
-getResourceTriple state = ((getName >>> arr (unode . s2b)) &&& ((getAttrValue "rdf:resource" &&& baseUrl) >>> expandURI >>> arr (unode . s2b))) >>> arr (attachSubject (stateSubject state))
181  
-  where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
  184
+getResourceTriple :: forall a. (ArrowXml a, ArrowState GParseState a)
  185
+                  => LParseState -> a XmlTree Triple
  186
+getResourceTriple state = nameToUNode &&& (attrExpandURI state "rdf:resource" >>> mkUNode)
  187
+    >>> arr (attachSubject (stateSubject state))
182 188
 
183  
-getNodeIdTriple :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Triple
184  
-getNodeIdTriple state = ((getName >>> arr (unode . s2b)) &&& (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))) >>> arr (attachSubject (stateSubject state))
  189
+getNodeIdTriple :: forall a. (ArrowXml a, ArrowState GParseState a)
  190
+                => LParseState -> a XmlTree Triple
  191
+getNodeIdTriple state = nameToUNode &&& (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
  192
+    >>> arr (attachSubject (stateSubject state))
185 193
 
186 194
 -- |Read a Node from the "rdf:about" property or generate a blank node
187 195
 mkNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
188  
-mkNode s = choiceA [ hasAttr "rdf:about" :-> (getAttrValue "rdf:about" &&& baseUrl >>> expandURI >>> arr (unode . s2b))
189  
-                   , hasAttr "rdf:resource" :-> (getAttrValue "rdf:resource" &&& baseUrl >>> expandURI >>> arr (unode . s2b))
190  
-                   , hasAttr "rdf:nodeID" :-> (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
191  
-                   , hasAttr "rdf:ID" :-> mkRelativeNode s
192  
-                   , this :-> mkBlankNode
193  
-                   ]
194  
-  where baseUrl = constA (case stateBaseUrl s of BaseUrl b -> b2s b)
  196
+mkNode state = choiceA [ hasAttr "rdf:about" :-> (attrExpandURI state "rdf:about" >>> mkUNode)
  197
+                       , hasAttr "rdf:resource" :-> (attrExpandURI state "rdf:resource" >>> mkUNode)
  198
+                       , hasAttr "rdf:nodeID" :-> (getAttrValue "rdf:nodeID" >>> arr (bnode . s2b))
  199
+                       , hasAttr "rdf:ID" :-> mkRelativeNode state
  200
+                       , this :-> mkBlankNode
  201
+                       ]
  202
+
  203
+rdfXmlLiteral = (mkFastString . s2b) "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
  204
+rdfFirst = (unode . s2b) "rdf:first"
  205
+rdfRest = (unode . s2b) "rdf:rest"
  206
+rdfNil = (unode . s2b) "rdf:nil"
  207
+rdfType = (unode . s2b) "rdf:type"
  208
+rdfStatement = (unode . s2b) "rdf:Statement"
  209
+rdfSubject = (unode . s2b) "rdf:subject"
  210
+rdfPredicate = (unode . s2b) "rdf:predicate"
  211
+rdfObject = (unode . s2b) "rdf:object"
  212
+
  213
+nameToUNode :: forall a. (ArrowXml a) => a XmlTree Node
  214
+nameToUNode = getName >>> mkUNode
  215
+
  216
+attrExpandURI :: forall a. (ArrowXml a) => LParseState -> String -> a XmlTree String
  217
+attrExpandURI state attr = getAttrValue attr &&& baseUrl >>> expandURI
  218
+  where baseUrl = constA (case stateBaseUrl state of BaseUrl b -> b2s b)
  219
+
  220
+-- |Make a UNode from an absolute string
  221
+mkUNode :: forall a. (Arrow a) => a String Node
  222
+mkUNode = arr (unode . s2b)
195 223
 
196  
---mkRelativeNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
197  
-mkRelativeNode s = (getAttrValue "rdf:ID" >>> arr (\x -> '#':x)) &&& baseUrl >>> expandURI >>> arr (unode . s2b)
  224
+-- |Make a UNode from a rdf:ID element, expanding relative URIs
  225
+mkRelativeNode :: forall a. (ArrowXml a, ArrowState GParseState a) => LParseState -> a XmlTree Node
  226
+mkRelativeNode s = (getAttrValue "rdf:ID" >>> arr (\x -> '#':x)) &&& baseUrl
  227
+    >>> expandURI >>> arr (unode . s2b)
198 228
   where baseUrl = constA (case stateBaseUrl s of BaseUrl b -> b2s b)
199 229
 
200  
-mkTypedLiteralNode :: LParseState -> FastString -> String -> Node
201  
-mkTypedLiteralNode (LParseState _ _ _) t content = lnode (typedL (s2b content) t)
  230
+-- |Make a literal node with the given type and content
  231
+mkTypedLiteralNode :: FastString -> String -> Node
  232
+mkTypedLiteralNode t content = lnode (typedL (s2b content) t)
202 233
 
203 234
 -- |Use the given state to create a literal node
204 235
 mkLiteralNode :: LParseState -> String -> Node
@@ -207,5 +238,6 @@ mkLiteralNode (LParseState _ Nothing _) content = (lnode . plainL . s2b) content
207 238
 
208 239
 -- |Generate an RDF blank node with incrementing IDs from the arrow state
209 240
 mkBlankNode :: forall a b. (ArrowState GParseState a) => a b Node
210  
-mkBlankNode = nextState (\gState -> gState { stateGenId = stateGenId gState + 1 } ) >>> arr (BNodeGen . stateGenId)
  241
+mkBlankNode = nextState (\gState -> gState { stateGenId = stateGenId gState + 1 })
  242
+    >>> arr (BNodeGen . stateGenId)
211 243
 

0 notes on commit 9a40204

Please sign in to comment.
Something went wrong with that request. Please try again.