Skip to content

Commit

Permalink
Introduced a type class 'RdfParser' with the 3 parse methods, and cre…
Browse files Browse the repository at this point in the history
…ated instances TurtleParser and NTriplesParser and updated all dependent code to work with the type classes now.

This change was introduced to make it easier to treat parsers polymorphically in functions that should work with any parser (e.g., the coming Interact module).

darcs-hash:20081208030204-ce9ea-6ba34e14357b65d7b0927e034e7cea3aa28b6785.gz
  • Loading branch information
eukaryote committed Dec 8, 2008
1 parent 1aadd1e commit 376877b
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 54 deletions.
18 changes: 9 additions & 9 deletions Rdf4hParseMain.hs
Expand Up @@ -2,9 +2,9 @@ module Main where

import Text.RDF.Core
import Text.RDF.TriplesGraph
import qualified Text.RDF.NTriplesParser as NP
import Text.RDF.NTriplesParser
import qualified Text.RDF.NTriplesSerializer as NS
import qualified Text.RDF.TurtleParser as TP
import Text.RDF.TurtleParser
import qualified Text.RDF.TurtleSerializer as TS

import Data.ByteString.Lazy.Char8(ByteString)
Expand Down Expand Up @@ -45,20 +45,20 @@ main =
hPrintf stderr " OUTPUT-FORMAT: %s\n" outputFormat >>
hPrintf stderr "OUTPUT-BASE-URI: %s\n\n" outputBaseUri)
let mInputUri = if inputBaseUri == "-" then Nothing else Just (BaseUrl $ s2b inputBaseUri)
docUri = inputUri
docUri = Just inputUri
case (inputFormat, isUri $ s2b inputUri) of
-- we use TriplesGraph in all cases, since it preserves the ordering of triples
("turtle", True) -> TP.parseURL mInputUri docUri inputUri
("turtle", True) -> parseURL (TurtleParser mInputUri docUri) inputUri
>>= \(res :: Either ParseFailure TriplesGraph) -> write res outputFormat
("turtle", False) -> (if inputUri /= "-"
then TP.parseFile mInputUri docUri inputUri
else B.getContents >>= return . TP.parseString mInputUri docUri)
then parseFile (TurtleParser mInputUri docUri) inputUri
else B.getContents >>= return . parseString (TurtleParser mInputUri docUri))
>>= \(res :: Either ParseFailure TriplesGraph) -> write res outputFormat
("ntriples", True) -> NP.parseURL inputUri
("ntriples", True) -> parseURL NTriplesParser inputUri
>>= \(res :: Either ParseFailure TriplesGraph) -> write res outputFormat
("ntriples", False) -> (if inputUri /= "-"
then NP.parseFile inputUri
else B.getContents >>= return . NP.parseString)
then parseFile NTriplesParser inputUri
else B.getContents >>= return . parseString NTriplesParser)
>>= \(res :: Either ParseFailure TriplesGraph) -> write res outputFormat
(str , _ ) -> putStrLn ("Invalid format: " ++ str) >> exitFailure

Expand Down
25 changes: 22 additions & 3 deletions Text/RDF/Core.hs
@@ -1,5 +1,8 @@
{- LANGUAGE EmptyDataDecls -}

module Text.RDF.Core (
Graph(empty, mkGraph, triplesOf, select, query, baseUrl, prefixMappings),
RdfParser(parseString, parseFile, parseURL),
BaseUrl(BaseUrl),
PrefixMappings(PrefixMappings), toPMList, PrefixMapping(PrefixMapping),
Triple(Triple), triple, Triples, sortTriples,
Expand Down Expand Up @@ -58,9 +61,8 @@ class Graph gr where
-- |Return an empty graph.
empty :: gr

-- |Return a graph containing all the given triples. Duplicate triples
-- are permitted in the input, but the resultant graph will contains only
-- unique triples.
-- |Return a graph containing all the given triples. Handling of duplicates
-- in the input depend on the particular graph implementation.
mkGraph :: Triples -> Maybe BaseUrl -> PrefixMappings -> gr

-- |Return all triples in the graph, as a list.
Expand Down Expand Up @@ -101,6 +103,23 @@ class Graph gr where
-- regardless of the predicate of the triple.
query :: gr -> Maybe Node -> Maybe Node -> Maybe Node -> Triples

-- |An RdfParser is a parser that knows how to parse 1 format of RDF and
-- can parse an RDF document of that type from a string, a file, or a URL.
-- Required configuration options will vary from instance to instance.
class RdfParser p where

-- |Parse RDF from the given bytestring, yielding a failure with error message or
-- the resultant graph.
parseString :: forall gr. (Graph gr) => p -> ByteString -> (Either ParseFailure gr)

-- |Parse RDF from the local file with the given path, yielding a failure with error
-- message or the resultant graph in the IO monad.
parseFile :: forall gr. (Graph gr) => p -> String -> IO (Either ParseFailure gr)

-- |Parse RDF from the remote file with the given HTTP URL (https is not supported),
-- yielding a failure with error message or the resultant graph in the IO monad.
parseURL :: forall gr. (Graph gr) => p -> String -> IO (Either ParseFailure gr)

-- |An RDF node, which may be either a URIRef node ('UNode'), a blank
-- node ('BNode'), or a literal node ('LNode').
data Node =
Expand Down
38 changes: 23 additions & 15 deletions Text/RDF/NTriplesParser.hs
@@ -1,5 +1,5 @@
module Text.RDF.NTriplesParser(
parseFile, parseURL, parseString, ParseFailure
NTriplesParser(NTriplesParser), ParseFailure
)

where
Expand All @@ -19,6 +19,19 @@ import Text.Parsec.ByteString.Lazy
import Data.ByteString.Lazy.Char8(ByteString)
import qualified Data.ByteString.Lazy.Char8 as B

-- |NTriplesParser is an 'RdfParser' implementation for parsing RDF in the
-- NTriples format. It requires no configuration options. To use this parser,
-- pass an 'NTriplesParser' value as the first argument to any of the
-- 'parseString', 'parseFile', or 'parseURL' methods of the 'RdfParser' type
-- class.
data NTriplesParser = NTriplesParser

-- |'NTriplesParser' is an instance of 'RdfParser'.
instance RdfParser NTriplesParser where
parseString _ = parseString'
parseFile _ = parseFile'
parseURL _ = parseURL'

-- We define or redefine all here using same names as the spec, but with an
-- 'nt_' prefix in order to avoid name clashes (e.g., ntripleDoc becomes
-- nt_ntripleDoc).
Expand All @@ -27,11 +40,6 @@ import qualified Data.ByteString.Lazy.Char8 as B
nt_ntripleDoc :: GenParser ByteString () [Maybe Triple]
nt_ntripleDoc = manyTill nt_line eof

--many :: GenParser ByteString () a -> GenParser ByteString () [a]
--many p =
-- do{ x <- p; xs <- many p; return (x:xs) }


nt_line :: GenParser ByteString () (Maybe Triple)
nt_line =
skipMany nt_space >>
Expand Down Expand Up @@ -192,18 +200,18 @@ b_quote = B.singleton '"'
between_chars :: Char -> Char -> GenParser ByteString () ByteString -> GenParser ByteString () ByteString
between_chars start end parser = char start >> parser >>= \res -> char end >> return res

parseURL :: Graph gr => String -> IO (Either ParseFailure gr)
parseURL url = _parseURL parseString url
parseString' :: forall gr. (Graph gr) => ByteString -> Either ParseFailure gr
parseString' bs = handleParse mkGraph (runParser nt_ntripleDoc () "" bs)

parseFile :: Graph gr => String -> IO (Either ParseFailure gr)
parseFile path = B.readFile path >>= return . runParser nt_ntripleDoc () path >>= return . handleParse mkGraph
parseURL' :: forall gr. (Graph gr) => String -> IO (Either ParseFailure gr)
parseURL' url = _parseURL parseString' url

parseString :: Graph gr => ByteString -> Either ParseFailure gr
parseString bs = handleParse mkGraph (runParser nt_ntripleDoc () "" bs)
parseFile' :: forall gr. (Graph gr) => String -> IO (Either ParseFailure gr)
parseFile' path = B.readFile path >>= return . runParser nt_ntripleDoc () path >>= return . handleParse mkGraph

handleParse :: Graph gr => (Triples -> Maybe BaseUrl -> PrefixMappings -> gr) ->
Either ParseError [Maybe Triple] ->
(Either ParseFailure gr)
handleParse :: forall gr. (Graph gr) => (Triples -> Maybe BaseUrl -> PrefixMappings -> gr) ->
Either ParseError [Maybe Triple] ->
(Either ParseFailure gr)
handleParse _mkGraph result
-- | B.length rem /= 0 = (Left $ ParseFailure $ "Invalid Document. Unparseable end of document: " ++ B.unpack rem)
| otherwise =
Expand Down
52 changes: 34 additions & 18 deletions Text/RDF/TurtleParser.hs
@@ -1,5 +1,5 @@
module Text.RDF.TurtleParser(
parseFile, parseURL, parseString, ParseFailure
TurtleParser(TurtleParser), ParseFailure
)

where
Expand All @@ -11,6 +11,7 @@ import Text.RDF.ParserUtils
import Text.Parsec
import Text.Parsec.ByteString.Lazy

--import Data.Map(Map)
import qualified Data.Map as Map

import Data.ByteString.Lazy.Char8(ByteString)
Expand All @@ -30,9 +31,25 @@ _trace = trace

-- http://www.w3.org/TeamSubmission/turtle/

-- |TurtleParser is an 'RdfParser' implementation for parsing RDF in the
-- Turtle format. It takes optional arguments representing the base URL to use
-- for resolving relative URLs in the document (may be overridden in the document
-- itself using the @base directive), and the URL to use for the document itself
-- for resolving references to <> in the document.
-- To use this parser, pass a 'TurtleParser' value as the first argument to any of
-- the 'parseString', 'parseFile', or 'parseURL' methods of the 'RdfParser' type
-- class.
data TurtleParser = TurtleParser (Maybe BaseUrl) (Maybe String)

-- |'TurtleParser' is an instance of 'RdfParser'.
instance RdfParser TurtleParser where
parseString (TurtleParser bUrl dUrl) = parseString' bUrl dUrl
parseFile (TurtleParser bUrl dUrl) = parseFile' bUrl dUrl
parseURL (TurtleParser bUrl dUrl) = parseURL' bUrl dUrl

type ParseState =
(Maybe BaseUrl, -- the current BaseUrl, may be Nothing initially, but not after it is once set
Maybe ByteString, -- the docUrl, which never changes but may or may not be used
Maybe ByteString, -- the docUrl, which never changes and is used to resolve <> in the document.
Int, -- the id counter, containing the value of the next id to be used
PrefixMappings, -- the mappings from prefix to URI that are encountered while parsing
[Subject], -- stack of current subject nodes, if we have parsed a subject but not finished the triple
Expand Down Expand Up @@ -535,34 +552,33 @@ addTripleForObject obj =
-- to @\<>@ within the document is expanded to the value given here. Additionally, if no @BaseUrl@ is
-- given and no @\@base@ directive has appeared before a relative URI occurs, this value is used as the
-- base URI against which the relative URI is resolved.
--
--p
-- Returns either a @ParseFailure@ or a new graph containing the parsed triples.
parseURL :: Graph gr =>
Maybe BaseUrl -- ^ The optional base URI of the document.
-> String -- ^ The document URI (i.e., the URI of the document itself).
-> String -- ^ The location URI from which to retrieve the Turtle document.
parseURL' :: forall gr. (Graph gr) =>
Maybe BaseUrl -- ^ The optional base URI of the document.
-> Maybe String -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI.
-> String -- ^ The location URI from which to retrieve the Turtle document.
-> IO (Either ParseFailure gr)
-- ^ The parse result, which is either a @ParseFailure@ or the graph
-- corresponding to the Turtle document.
parseURL bUrl docUrl locUrl = _parseURL (parseString bUrl docUrl) locUrl
-- ^ The parse result, which is either a @ParseFailure@ or the graph
-- corresponding to the Turtle document.
parseURL' bUrl docUrl locUrl = _parseURL (parseString' bUrl docUrl) locUrl

-- |Parse the given file as a Turtle document. The arguments and return type have the same semantics
-- as 'parseURL', except that the last @String@ argument corresponds to a filesystem location rather
-- than a location URI.
--
-- Returns either a @ParseFailure@ or a new graph containing the parsed triples.
parseFile :: Graph gr => Maybe BaseUrl -> String -> String -> IO (Either ParseFailure gr)
parseFile bUrl docUrl fpath =
B.readFile fpath >>= \bs -> return $ handleResult bUrl (runParser t_turtleDoc initialState docUrl bs)
where initialState = (bUrl, Just (s2b docUrl), 1, PrefixMappings Map.empty, [], [], [], Seq.empty)
parseFile' :: forall gr. (Graph gr) => Maybe BaseUrl -> Maybe String -> String -> IO (Either ParseFailure gr)
parseFile' bUrl docUrl fpath =
B.readFile fpath >>= \bs -> return $ handleResult bUrl (runParser t_turtleDoc initialState (maybe "" id docUrl) bs)
where initialState = (bUrl, (maybe Nothing (Just . B.pack) docUrl), 1, PrefixMappings Map.empty, [], [], [], Seq.empty)

-- |Parse the given string as a Turtle document. The arguments and return type have the same semantics
-- as <parseURL>, except that the last @String@ argument corresponds to the Turtle document itself as
-- a a string rather than a location URI.
parseString :: Graph gr => Maybe BaseUrl -> String -> ByteString -> Either ParseFailure gr
parseString bUrl docUrl ttlStr = handleResult bUrl (runParser t_turtleDoc initialState "" (ttlStr))
where initialState = (bUrl, Just (s2b docUrl), 1, PrefixMappings Map.empty, [], [], [], Seq.empty)

parseString' :: forall gr. (Graph gr) => Maybe BaseUrl -> Maybe String -> ByteString -> Either ParseFailure gr
parseString' bUrl docUrl ttlStr = handleResult bUrl (runParser t_turtleDoc initialState "" (ttlStr))
where initialState = (bUrl, (maybe Nothing (Just . B.pack) docUrl), 1, PrefixMappings Map.empty, [], [], [], Seq.empty)

handleResult :: Graph gr => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure gr
handleResult bUrl result =
Expand Down
18 changes: 9 additions & 9 deletions Text/RDF/TurtleParser_ConformanceTest.hs
Expand Up @@ -3,7 +3,7 @@ module Main where

import Text.RDF.Core
import Text.RDF.TurtleParser
import qualified Text.RDF.NTriplesParser as NT
import Text.RDF.NTriplesParser
import Text.RDF.TriplesGraph
import Text.RDF.GraphTestUtils

Expand Down Expand Up @@ -114,11 +114,11 @@ equivalent (Right gr1) (Right gr2) = {- _debug (show (length gr1ts, length gr2
loadInputGraph :: String -> Int -> IO (Either ParseFailure TriplesGraph)
loadInputGraph name n =
B.readFile (fpath name n "ttl") >>=
return . parseString mtestBaseUri (mkDocUrl testBaseUri name n) >>= return . handleLoad
return . parseString (TurtleParser mtestBaseUri (mkDocUrl testBaseUri name n)) >>= return . handleLoad
loadInputGraph1 :: String -> String -> IO (Either ParseFailure TriplesGraph)
loadInputGraph1 dir fname =
B.readFile (printf "%s/%s.ttl" dir fname :: String) >>=
return . parseString mtestBaseUri (mkDocUrl1 testBaseUri fname) >>= return . handleLoad
return . parseString (TurtleParser mtestBaseUri (mkDocUrl1 testBaseUri fname)) >>= return . handleLoad

handleLoad :: Either ParseFailure TriplesGraph -> Either ParseFailure TriplesGraph
handleLoad res =
Expand All @@ -138,7 +138,7 @@ normalizeN n = n
loadExpectedGraph :: String -> Int -> IO (Either ParseFailure TriplesGraph)
loadExpectedGraph name n = loadExpectedGraph1 (fpath name n "out")
loadExpectedGraph1 :: String -> IO (Either ParseFailure TriplesGraph)
loadExpectedGraph1 filename = B.readFile filename >>= return . NT.parseString
loadExpectedGraph1 filename = B.readFile filename >>= return . parseString NTriplesParser

assertLoadSuccess, assertLoadFailure :: String -> Either ParseFailure TriplesGraph -> T.Assertion
assertLoadSuccess idStr (Left (ParseFailure err)) = T.assertFailure $ idStr ++ err
Expand All @@ -161,16 +161,16 @@ _test testGood testNum = B.readFile fpath >>= f
name = if testGood then "test" else "bad" :: String
docUrl = mkDocUrl testBaseUri name testNum
f :: B.ByteString -> IO ()
f s = let result = parseString mtestBaseUri docUrl s
f s = let result = parseString (TurtleParser mtestBaseUri docUrl) s
in case result of
(Left err) -> putStrLn $ "ERROR:" ++ show err
(Right gr) -> mapM_ (putStrLn . show) (triplesOf (gr :: TriplesGraph))

mkDocUrl :: String -> String -> Int -> String
mkDocUrl baseDocUrl fname testNum = printf "%s%s-%02d.ttl" baseDocUrl fname testNum
mkDocUrl :: String -> String -> Int -> Maybe String
mkDocUrl baseDocUrl fname testNum = Just $ printf "%s%s-%02d.ttl" baseDocUrl fname testNum

mkDocUrl1 :: String -> String -> String
mkDocUrl1 baseDocUrl fname = printf "%s%s.ttl" baseDocUrl fname
mkDocUrl1 :: String -> String -> Maybe String
mkDocUrl1 baseDocUrl fname = Just $ printf "%s%s.ttl" baseDocUrl fname

doTest :: Bool -> Int -> IO (T.Counts, Int)
doTest True testNum = checkGoodConformanceTest testNum >>= T.runTestText (T.putTextToHandle stdout True)
Expand Down

0 comments on commit 376877b

Please sign in to comment.