Permalink
Browse files

0.9.0.0: Finish tidying up the API, add type-safe replacement templates

  * Flip the order of the arguments to replace (fixes #78)
  * Add type-safe replacement templates (fixes #60)
  * Finish tidying up the API (fixes #80)
  * Make `regex` compatible w/ TH-less GHCs (fixes #70)
  * Declare extensions the compiler must support (fixes #83)
  * Fix curl for AppVeyor build (fixes #79)
  * Fix AppVeyor badge (fixes #81)
  * Remove QQ from code coverage stats (fixes #82)
  * Rename Options, Context and Replace methods (fixes #84)
  • Loading branch information...
cdornan committed Mar 25, 2017
1 parent a7d4a2b commit 9f24a3f874e328a5b4fc11a9ccf0c939b41a1ba0
Showing with 3,373 additions and 177,207 deletions.
  1. +2 −2 .travis.yml
  2. +1 −1 README.md
  3. +18 −61 Text/RE.hs
  4. +1 −0 Text/RE/Internal/AddCaptureNames.hs
  5. +4 −0 Text/RE/Internal/EscapeREString.hs
  6. +54 −32 Text/RE/Internal/NamedCaptures.lhs
  7. +9 −1 Text/RE/Internal/PreludeMacros.hs
  8. +5 −2 Text/RE/Internal/QQ.hs
  9. +40 −22 Text/RE/PCRE.hs
  10. +38 −25 Text/RE/PCRE/ByteString.hs
  11. +38 −25 Text/RE/PCRE/ByteString/Lazy.hs
  12. +326 −121 Text/RE/PCRE/RE.hs
  13. +38 −25 Text/RE/PCRE/Sequence.hs
  14. +38 −25 Text/RE/PCRE/String.hs
  15. +99 −0 Text/RE/SearchReplace.hs
  16. +20 −0 Text/RE/Summa.hs
  17. +40 −23 Text/RE/TDFA.hs
  18. +38 −25 Text/RE/TDFA/ByteString.hs
  19. +38 −25 Text/RE/TDFA/ByteString/Lazy.hs
  20. +335 −119 Text/RE/TDFA/RE.hs
  21. +38 −25 Text/RE/TDFA/Sequence.hs
  22. +38 −25 Text/RE/TDFA/String.hs
  23. +38 −25 Text/RE/TDFA/Text.hs
  24. +38 −25 Text/RE/TDFA/Text/Lazy.hs
  25. +5 −1 Text/RE/TestBench.lhs
  26. +9 −9 Text/RE/TestBench/Parsers.hs
  27. +37 −26 Text/RE/Tools/Edit.lhs
  28. +4 −1 Text/RE/Tools/Lex.lhs
  29. +2 −1 Text/RE/Tools/Sed.lhs
  30. +27 −0 Text/RE/Types.hs
  31. +2 −2 Text/RE/Types/CaptureID.hs
  32. +21 −6 Text/RE/Types/IsRegex.lhs
  33. +6 −3 Text/RE/Types/LineNo.hs
  34. +2 −1 Text/RE/Types/Match.lhs
  35. +3 −4 Text/RE/Types/Matches.lhs
  36. +0 −69 Text/RE/Types/Options.lhs
  37. +94 −0 Text/RE/Types/REOptions.lhs
  38. +136 −102 Text/RE/Types/Replace.lhs
  39. +33 −0 Text/RE/Types/SearchReplace.hs
  40. +3 −0 appveyor.yml
  41. BIN bin/shc.xz
  42. +11 −0 changelog
  43. +0 −175,526 check-plan.yaml
  44. +39 −28 docs/Edit.html
  45. +4 −4 docs/IsRegex.html
  46. +4 −1 docs/Lex.html
  47. +58 −36 docs/NamedCaptures.html
  48. +128 −0 docs/REOptions.html
  49. +136 −102 docs/Replace.html
  50. +4 −3 docs/Sed.html
  51. +22 −18 docs/TestBench.html
  52. +13 −12 docs/TestKit.html
  53. +6 −1 docs/about.html
  54. +2 −2 docs/badges/hackage.svg
  55. +25 −2 docs/build-status.html
  56. +11 −0 docs/changelog.html
  57. +11 −1 docs/contact.html
  58. +6 −1 docs/directory.html
  59. +13 −3 docs/index.html
  60. +11 −11 docs/installation.html
  61. +60 −27 docs/re-gen-cabals.html
  62. +12 −11 docs/re-gen-modules.html
  63. +10 −7 docs/re-include.html
  64. +74 −72 docs/re-nginx-log-processor.html
  65. +95 −92 docs/re-prep.html
  66. +180 −57 docs/re-tests.html
  67. +73 −70 docs/re-tutorial.html
  68. +1 −1 docs/roadmap.html
  69. +5 −5 docs/tutorial.html
  70. +5 −4 examples/TestKit.lhs
  71. +45 −12 examples/re-gen-cabals.lhs
  72. +8 −7 examples/re-gen-modules.lhs
  73. +5 −2 examples/re-include.lhs
  74. +11 −9 examples/re-nginx-log-processor.lhs
  75. +19 −16 examples/re-prep.lhs
  76. +218 −63 examples/re-tests.lhs
  77. +23 −20 examples/re-tutorial-master.lhs
  78. +23 −20 examples/re-tutorial.lhs
  79. +3 −5 lib/README-regex-examples.md
  80. +3 −5 lib/README-regex.md
  81. +8 −8 lib/cabal-masters/executables-incl.cabal
  82. +10 −2 lib/cabal-masters/library-incl.cabal
  83. +1 −1 lib/cabal-masters/mega-regex.cabal
  84. +1 −1 lib/cabal-masters/regex-examples.cabal
  85. +1 −1 lib/cabal-masters/regex-incl.cabal
  86. +1 −1 lib/cabal-masters/regex-with-pcre.cabal
  87. +6 −2 lib/cabal-masters/regex.cabal
  88. +10 −2 lib/md/build-status.md
  89. +1 −1 lib/md/roadmap-incl.md
  90. +68 −20 lib/mega-regex.cabal
  91. +37 −22 lib/regex-examples.cabal
  92. +30 −4 lib/regex-with-pcre.cabal
  93. +34 −5 lib/regex.cabal
  94. +1 −1 lib/version.txt
  95. +68 −20 regex.cabal
  96. BIN releases/regex-0.9.0.0.tar.gz
  97. BIN releases/regex-examples-0.9.0.0.tar.gz
  98. BIN releases/regex-with-pcre-0.9.0.0.tar.gz
  99. +1 −1 stack-nightly.yaml
View
@@ -212,6 +212,6 @@ script:
set +ex
after_script:
- - travis_retry curl -L https://github.com/rubik/stack-hpc-coveralls/releases/download/v0.0.4.0/shc-linux-x64-$GHCVER.tar.bz2 | tar -xj
+ - travis_retry curl -L https://lainepress.github.io/shc/shc.tar.xz | tar xJ
- |
- [ "$BUILD" == stack -a "$GHCVER" == 7.10.3 ] && ./shc --partial-coverage regex re-gen-cabals-test re-gen-modules-test re-include-test re-nginx-log-processor-test re-prep-test re-tests-test re-tutorial-test
+ [ "$BUILD" == stack -a "$GHCVER" == 7.10.3 ] && ./shc --exclude-files=Text/RE/Internal/QQ.hs --partial-coverage regex re-gen-cabals-test re-gen-modules-test re-include-test re-nginx-log-processor-test re-prep-test re-tests-test re-tutorial-test
View
@@ -48,7 +48,7 @@ two packages:
- [X] 2017-03-13 v0.6.0.1 [Fix .travis.yml release-stack script](https://github.com/iconnect/regex/issues/67)
- [X] 2017-03-15 v0.7.0.0 [Better organization of API](https://github.com/iconnect/regex/milestone/8)
- [X] 2017-03-16 v0.8.0.0 [Tidy up the API](https://github.com/iconnect/regex/milestone/10)
-- [ ] 2017-03-18 v0.9.0.0 [Add type-safe replacement templates and use TemplateHaskellQuotes](https://github.com/iconnect/regex/milestone/9)
+- [X] 2017-03-18 v0.9.0.0 [Finish tidying up the API, Add type-safe replacement templates and exploit TemplateHaskellQuotes](https://github.com/iconnect/regex/milestone/9)
- [ ] 2017-03-31 v1.0.0.0 [First stable release](https://github.com/iconnect/regex/milestone/3)
- [ ] 2017-08-31 v2.0.0.0 [Fast text replacement with benchmarks](https://github.com/iconnect/regex/milestone/4)
View
@@ -18,76 +18,32 @@ module Text.RE
-- ** The Match Operators
-- $operators
- -- * Matches, Match & Capture
- Matches(..)
- , Match(..)
- , Capture(..)
- , noMatch
- -- ** Matches functions
+ -- * Matches
+ Matches
+ , matchesSource
+ , allMatches
, anyMatches
, countMatches
, matches
- , mainCaptures
- -- ** Match functions
+ -- * Match
+ , Match
+ , matchSource
, matched
, matchedText
- , matchCapture
- , matchCaptures
- , (!$$)
- , captureText
- , (!$$?)
- , captureTextMaybe
- , (!$)
- , capture
- , (!$?)
- , captureMaybe
- -- ** Capture functions
- , hasCaptured
- , capturePrefix
- , captureSuffix
- -- * Options
- , SimpleRegexOptions(..)
- -- * CaptureID
- , CaptureID
- -- * Replace
- , Replace(..)
- , ReplaceMethods(..)
- , replaceMethods
- , Context(..)
- , Location(..)
- , isTopLocation
- , replace
- , replaceAll
- , replaceAllCaptures
- , replaceAllCaptures_
- , replaceAllCapturesM
- , replaceCaptures
- , replaceCaptures_
- , replaceCapturesM
- , expandMacros
- , expandMacros'
) where
-import Text.RE.Types.Capture
-import Text.RE.Types.CaptureID
import Text.RE.Types.Match
import Text.RE.Types.Matches
-import Text.RE.Types.Options
-import Text.RE.Types.Replace
-- $tutorial
--- We have a regex tutorial at <http://tutorial.regex.uk>. These API
--- docs are mainly for reference.
+--
+-- We have a regex tutorial at <http://tutorial.regex.uk>.
-- $use
--
--- This module won't provide any operators to match a regular expression
--- against text as it merely provides the toolkit for working with the
--- output of the match operators. You probably won't import it directly
--- but import one of the modules that provides the match operators,
--- which will in tuen re-export this module.
---
--- The module that you choose to import will depend upon two factors:
+-- This module just provides an overview of the key type on which
+-- the regex package is built. You will need to import one of the API
+-- modules of which there is a choice which will depend upon two factors:
--
-- * Which flavour of regular expression do you want to use? If you want
-- Posix flavour REs then you want the TDFA modules, otherwise its
@@ -121,15 +77,16 @@ import Text.RE.Types.Replace
-- $operators
--
--- The traditional @=~@ and @=~~@ operators are exported by the @regex@,
--- but we recommend that you use the two new operators, especially if
--- you are not familiar with the old operators. We have:
+-- The traditional @=~@ and @=~~@ operators are exported by the above
+-- API module, but we recommend that you use the two new operators,
+-- especially if you are not familiar with the old operators. We have:
--
-- * @txt ?=~ re@ searches for a single match yielding a value of type
-- 'Match' @a@ where @a@ is the type of the text you are searching.
--
-- * @txt *=~ re@ searches for all non-overlapping matches in @txt@,
-- returning a value of type 'Matches' @a@.
--
--- See the sections below for more information on these @Matches@ and
--- @Match@ result types.
+-- The remainder of this module outlines these @Matches@ and
+-- @Match@ result types. Only an outline is given here. For more details
+-- see the 'Text.RE.Type.Matches' and 'Text.RE.Type.Match' modules.
@@ -16,6 +16,7 @@ import qualified Data.Text.Lazy as TL
import Prelude.Compat
import Text.RE
import Text.RE.Types.CaptureID
+import Text.RE.Types.Match
import Unsafe.Coerce
@@ -1,11 +1,15 @@
module Text.RE.Internal.EscapeREString where
+-- | Convert a string into a regular expression that will amtch that
+-- string
escapeREString :: String -> String
escapeREString = foldr esc []
where
esc c t | isMetaChar c = '\\' : c : t
| otherwise = c : t
+-- | returns True iff the charactr is an RE meta character
+-- ('[', '*', '{', etc.)
isMetaChar :: Char -> Bool
isMetaChar c = case c of
'^' -> True
@@ -1,13 +1,18 @@
\begin{code}
-{-# LANGUAGE QuasiQuotes #-}
-{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE CPP #-}
+#if __GLASGOW_HASKELL__ >= 800
+{-# LANGUAGE TemplateHaskellQuotes #-}
+#else
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE TemplateHaskell #-}
+#endif
module Text.RE.Internal.NamedCaptures
( cp
, extractNamedCaptures
- , idFormatTokenOptions
+ , idFormatTokenREOptions
, Token(..)
, validToken
, formatTokens
@@ -22,23 +27,27 @@ import qualified Data.Text as T
import GHC.Generics
import qualified Language.Haskell.TH as TH
import Language.Haskell.TH.Quote
-import Text.Heredoc
import Text.RE
import Text.RE.Internal.PreludeMacros
import Text.RE.Internal.QQ
import Text.RE.TestBench
import Text.RE.Tools.Lex
import Text.RE.Types.CaptureID
+import Text.RE.Types.Match
import Text.Regex.TDFA
+-- | quasi quoter for CaptureID ([cp|0|],[cp|y|], etc.)
cp :: QuasiQuoter
cp =
(qq0 "cp")
{ quoteExp = parse_capture
}
-extractNamedCaptures :: String -> Either String (CaptureNames,String)
+-- | extract the CaptureNames from an RE or return an error diagnostic
+-- if the RE is not well formed; also returs the total number of captures
+-- in the RE
+extractNamedCaptures :: String -> Either String ((Int,CaptureNames),String)
extractNamedCaptures s = Right (analyseTokens tks,formatTokens tks)
where
tks = scan s
@@ -49,6 +58,7 @@ Token
-----
\begin{code}
+-- | our RE scanner returns a list of these tokens
data Token
= ECap (Maybe String)
| PGrp
@@ -58,6 +68,7 @@ data Token
| Other Char
deriving (Show,Generic,Eq)
+-- | check that a token is well formed
validToken :: Token -> Bool
validToken tkn = case tkn of
ECap mb -> maybe True check_ecap mb
@@ -78,11 +89,15 @@ Analysing [Token] -> CaptureNames
---------------------------------
\begin{code}
-analyseTokens :: [Token] -> CaptureNames
-analyseTokens = HM.fromList . count_em 1
+-- | analyse a token stream, returning the number of captures and the
+-- 'CaptureNames'
+analyseTokens :: [Token] -> (Int,CaptureNames)
+analyseTokens tks0 = case count_em 1 tks0 of
+ (n,as) -> (n-1, HM.fromList as)
where
- count_em _ [] = []
- count_em n (tk:tks) = bd ++ count_em (n `seq` n+d) tks
+ count_em n [] = (n,[])
+ count_em n (tk:tks) = case count_em (n `seq` n+d) tks of
+ (n',as) -> (n',bd++as)
where
(d,bd) = case tk of
ECap (Just nm) -> (,) 1 [(CaptureName $ T.pack nm,CaptureOrdinal n)]
@@ -99,18 +114,19 @@ Scanning Regex Strings
----------------------
\begin{code}
+-- | scan a RE string into a list of RE Token
scan :: String -> [Token]
scan = alex' match al oops
where
al :: [(Regex,Match String->Maybe Token)]
al =
- [ mk [here|\$\{([^{}]+)\}\(|] $ ECap . Just . x_1
- , mk [here|\$\(|] $ const $ ECap Nothing
- , mk [here|\(\?:|] $ const PGrp
- , mk [here|\(\?|] $ const PCap
- , mk [here|\(|] $ const Bra
- , mk [here|\\(.)|] $ BS . s2c . x_1
- , mk [here|(.)|] $ Other . s2c . x_1
+ [ mk "\\$\\{([^{}]+)\\}\\(" $ ECap . Just . x_1
+ , mk "\\$\\(" $ const $ ECap Nothing
+ , mk "\\(\\?:" $ const PGrp
+ , mk "\\(\\?" $ const PCap
+ , mk "\\(" $ const Bra
+ , mk "\\\\(.)" $ BS . s2c . x_1
+ , mk "(.)" $ Other . s2c . x_1
]
x_1 = captureText $ IsCaptureOrdinal $ CaptureOrdinal 1
@@ -139,35 +155,41 @@ Formatting [Token]
------------------
\begin{code}
+-- | format [Token] into an RE string
formatTokens :: [Token] -> String
-formatTokens = formatTokens' defFormatTokenOptions
-
-data FormatTokenOptions =
- FormatTokenOptions
- { _fto_regex_type :: Maybe RegexType
- , _fto_min_caps :: Bool
- , _fto_incl_caps :: Bool
+formatTokens = formatTokens' defFormatTokenREOptions
+
+-- | options for the general Token formatter below
+data FormatTokenREOptions =
+ FormatTokenREOptions
+ { _fto_regex_type :: Maybe RegexType -- ^ Posix, PCRE or indeterminate REs?
+ , _fto_min_caps :: Bool -- ^ remove captures where possible
+ , _fto_incl_caps :: Bool -- ^ include the captures in the output
}
deriving (Show)
-defFormatTokenOptions :: FormatTokenOptions
-defFormatTokenOptions =
- FormatTokenOptions
+-- | the default configuration for the Token formatter
+defFormatTokenREOptions :: FormatTokenREOptions
+defFormatTokenREOptions =
+ FormatTokenREOptions
{ _fto_regex_type = Nothing
, _fto_min_caps = False
, _fto_incl_caps = False
}
-idFormatTokenOptions :: FormatTokenOptions
-idFormatTokenOptions =
- FormatTokenOptions
+-- | a configuration that will preserve the parsed regular expression
+-- in the output
+idFormatTokenREOptions :: FormatTokenREOptions
+idFormatTokenREOptions =
+ FormatTokenREOptions
{ _fto_regex_type = Nothing
, _fto_min_caps = False
, _fto_incl_caps = True
}
-formatTokens' :: FormatTokenOptions -> [Token] -> String
-formatTokens' FormatTokenOptions{..} = foldr f ""
+-- | the general Token formatter, generating REs according to the options
+formatTokens' :: FormatTokenREOptions -> [Token] -> String
+formatTokens' FormatTokenREOptions{..} = foldr f ""
where
f tk tl = t_s ++ tl
where
@@ -191,7 +213,7 @@ formatTokens' FormatTokenOptions{..} = foldr f ""
\end{code}
\begin{code}
--- this is a reference of formatTokens defFormatTokenOptions,
+-- this is a reference of formatTokens defFormatTokenREOptions,
-- used for testing the latter
formatTokens0 :: [Token] -> String
formatTokens0 = foldr f ""
@@ -30,33 +30,40 @@ import Data.Maybe
import qualified Data.Text as T
import Data.Time
import Prelude.Compat
-import Text.RE.Types.Options
+import Text.RE.Types.REOptions
import Text.RE.TestBench.Parsers
import Text.RE.TestBench
+-- | generate the standard prelude Macros used to parse REs
preludeMacros :: (Monad m,Functor m)
=> (String->m r)
-> RegexType
-> WithCaptures
-> m (Macros r)
preludeMacros prs rty wc = mkMacros prs rty wc $ preludeMacroEnv rty
+-- | format the standard prelude macros in a markdown table
preludeMacroTable :: RegexType -> String
preludeMacroTable rty = formatMacroTable rty $ preludeMacroEnv rty
+-- | generate a textual summary of the prelude macros
preludeMacroSummary :: RegexType -> PreludeMacro -> String
preludeMacroSummary rty =
formatMacroSummary rty (preludeMacroEnv rty) . prelude_macro_id
+-- | generate a plain text table giving the RE for each macro with all
+-- macros expanded (to NF)
preludeMacroSources :: RegexType -> String
preludeMacroSources rty =
formatMacroSources rty ExclCaptures $ preludeMacroEnv rty
+-- | generate plain text giving theexpanded RE for a single macro
preludeMacroSource :: RegexType -> PreludeMacro -> String
preludeMacroSource rty =
formatMacroSource rty ExclCaptures (preludeMacroEnv rty) . prelude_macro_id
+-- | generate the `MacroEnv` for the standard prelude macros
preludeMacroEnv :: RegexType -> MacroEnv
preludeMacroEnv rty = fix $ prelude_macro_env rty
@@ -66,6 +73,7 @@ prelude_macro_env rty env = HML.fromList $ catMaybes
| pm<-[minBound..maxBound]
]
+-- | generate the `MacroDescriptor` for a given `PreludeMacro`
preludeMacroDescriptor :: RegexType
-> MacroEnv
-> PreludeMacro
Oops, something went wrong.

0 comments on commit 9f24a3f

Please sign in to comment.