Permalink
Browse files

pre-release candidate

  • Loading branch information...
cdornan committed Jan 26, 2017
1 parent dc1059f commit 7da04d19c3b160ca6549fa7d134612fe1ee730fa
Showing with 22,263 additions and 448 deletions.
  1. +3 −3 .ghci
  2. +2 −0 .gitattributes
  3. +41 −1 .gitignore
  4. +40 −2 README.md
  5. +21 −0 Text/RE.hs
  6. +252 −0 Text/RE/Capture.lhs
  7. +39 −0 Text/RE/CaptureID.hs
  8. +94 −0 Text/RE/Edit.lhs
  9. +11 −0 Text/RE/Internal/AddCaptureNames.hs
  10. +271 −0 Text/RE/Internal/NamedCaptures.lhs
  11. +852 −0 Text/RE/Internal/PreludeMacros.hs
  12. +21 −0 Text/RE/Internal/QQ.hs
  13. +16 −0 Text/RE/IsRegex.lhs
  14. +18 −0 Text/RE/LineNo.hs
  15. +77 −0 Text/RE/Options.lhs
  16. +57 −0 Text/RE/PCRE.hs
  17. +53 −0 Text/RE/PCRE/ByteString.hs
  18. +53 −0 Text/RE/PCRE/ByteString/Lazy.hs
  19. +246 −0 Text/RE/PCRE/RE.hs
  20. +53 −0 Text/RE/PCRE/Sequence.hs
  21. +53 −0 Text/RE/PCRE/String.hs
  22. +156 −0 Text/RE/Parsers.hs
  23. +496 −0 Text/RE/Replace.lhs
  24. +60 −0 Text/RE/TDFA.hs
  25. +53 −0 Text/RE/TDFA/ByteString.hs
  26. +53 −0 Text/RE/TDFA/ByteString/Lazy.hs
  27. +242 −0 Text/RE/TDFA/RE.hs
  28. +53 −0 Text/RE/TDFA/Sequence.hs
  29. +53 −0 Text/RE/TDFA/String.hs
  30. +53 −0 Text/RE/TDFA/Text.hs
  31. +53 −0 Text/RE/TDFA/Text/Lazy.hs
  32. +557 −0 Text/RE/TestBench.lhs
  33. +58 −0 Text/RE/Tools/Grep.lhs
  34. +35 −0 Text/RE/Tools/Lex.lhs
  35. +50 −0 Text/RE/Tools/Sed.lhs
  36. +4 −0 changelog
  37. +562 −0 data/access-errors.log
  38. +538 −0 data/access.log
  39. +23 −0 data/error.log
  40. +562 −0 data/events.log
  41. +57 −0 data/include-result.lhs
  42. +58 −0 data/pp-result-doc.lhs
  43. +66 −0 data/pp-result-gen.lhs
  44. +50 −0 data/pp-test.lhs
  45. +261 −0 docs/Capture.html
  46. +144 −0 docs/Edit.html
  47. +108 −0 docs/Grep.html
  48. +62 −0 docs/IsRegex.html
  49. +85 −0 docs/Lex.html
  50. +275 −0 docs/NamedCaptures.html
  51. +100 −0 docs/Options.html
  52. +479 −0 docs/Replace.html
  53. +100 −0 docs/Sed.html
  54. +530 −0 docs/TestBench.html
  55. +135 −0 docs/TestKit.html
  56. +1 −0 docs/_config.yml
  57. +125 −0 docs/index.md
  58. +7,118 −0 docs/lib/bs.css
  59. +21 −0 docs/lib/styles.css
  60. +177 −0 docs/re-gen-modules.html
  61. +152 −0 docs/re-include.html
  62. +618 −0 docs/re-nginx-log-processor.html
  63. +407 −0 docs/re-pp.html
  64. +307 −0 docs/re-tests.html
  65. +732 −0 docs/re-tutorial.html
  66. +94 −0 examples/TestKit.lhs
  67. +127 −0 examples/re-gen-modules.lhs
  68. +138 −0 examples/re-include.lhs
  69. +592 −0 examples/re-nginx-log-processor.lhs
  70. +459 −0 examples/re-pp.lhs
  71. +257 −0 examples/re-tests.lhs
  72. +819 −0 examples/re-tutorial-master.lhs
  73. +859 −0 examples/re-tutorial.lhs
  74. +0 −49 regex-simple.cabal
  75. +316 −0 regex.cabal
  76. +0 −285 src/Text/Regex/Simple.hs
  77. +0 −108 src/regex-simple-examples.hs
  78. +24 −0 tables/README.md
  79. +146 −0 tables/nginx-log-processor-PCRE.md
  80. +24 −0 tables/nginx-log-processor-PCRE.txt
  81. +130 −0 tables/prelude-PCRE.md
  82. +19 −0 tables/prelude-PCRE.txt
  83. +137 −0 tables/prelude-TDFA.md
  84. +20 −0 tables/prelude-TDFA.txt
View
6 .ghci
@@ -1,10 +1,10 @@
:set -Wall
-:set -isrc
-:set -imain
-:set -izeus
:set -hide-package monads-tf
:set -hide-package cryptonite
:set -hide-package crypto-api
:set -hide-package regex-tdfa-rc
:seti -XOverloadedStrings
+:seti -XQuasiQuotes
+
+:set -iexamples
View
@@ -0,0 +1,2 @@
+docs/lib/bs.css linguist-vendored
+*.html linguist-vendored
View
@@ -1,3 +1,15 @@
+# regex
+
+.hub
+.ghci
+.hub
+.ghci
+tmp
+tmp
+
+
+# Haskell
+
dist
dist-*
cabal-dev
@@ -17,4 +29,32 @@ cabal.sandbox.config
*.eventlog
.stack-work/
cabal.project.local
-.hub
+
+# MacOS
+
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
View
@@ -1,2 +1,40 @@
-# regex-idiot
-A regex interface to regex-base for idiots
+# regex: A Regular Expression Toolkit for regex-base
+
+regex extends regex-base with:
+
+ * a text-replacement toolkit
+ * special datatypes for many matches, first match and individual captures
+ * compile-time checking of RE syntax
+ * a unified means of controlling case-sensitivity and multi-line options
+ * high-level Awk-like tools for building text processing apps
+ * the option of using match operators with reduced polymorphism on the
+ text and/or result types
+ * regular expression macros including
+ + a number of useful RE macros
+ + a test bench for testing and documenting new macro environments
+ * built-in support for the TDFA and PCRE backends
+ * comprehensive documentation and copious examples
+
+
+Schedule
+--------
+
+ * **2017-01-27**  0.0.0.1  Pre-release
+ * **2017-01-30**  0.0.1.0  RFC
+ * **2017-02-20**  0.1.0.0  a candidate stable release
+ * **2017-02-20**  1.0.0.0  a first stable release
+
+
+The Web Page
+------------
+
+We have a [web page](https://lainepress.github.io/regex/) with a tutorial,
+a major example and and more examples than you can shake a stick at (most
+of them used in the package itself).
+
+
+The Macro Tables
+----------------
+
+The macro environments are an important part of the package and they
+are documented [in these tables](tables).
View
@@ -0,0 +1,21 @@
+{-# OPTIONS_GHC -fno-warn-dodgy-exports #-}
+
+module Text.RE
+ ( module Text.RE.Capture
+ , module Text.RE.CaptureID
+ , module Text.RE.Edit
+ , module Text.RE.IsRegex
+ , module Text.RE.LineNo
+ , module Text.RE.Options
+ , module Text.RE.Parsers
+ , module Text.RE.Replace
+ ) where
+
+import Text.RE.Capture
+import Text.RE.CaptureID
+import Text.RE.Edit
+import Text.RE.IsRegex
+import Text.RE.LineNo
+import Text.RE.Options
+import Text.RE.Parsers
+import Text.RE.Replace
View
@@ -0,0 +1,252 @@
+\begin{code}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+\end{code}
+
+\begin{code}
+module Text.RE.Capture
+ ( Matches(..)
+ , Match(..)
+ , Capture(..)
+ -- Matches functions
+ , anyMatches
+ , countMatches
+ , matches
+ , mainCaptures
+ -- Match functions
+ , matched
+ , matchedText
+ , matchCapture
+ , matchCaptures
+ , captureText
+ , captureTextMaybe
+ , capture
+ , captureMaybe
+ -- Capture functions
+ , hasCaptured
+ , capturePrefix
+ , captureSuffix
+ ) where
+\end{code}
+
+\begin{code}
+import Data.Array
+import Data.Maybe
+import Text.Regex.Base
+import Text.RE.CaptureID
+\end{code}
+
+
+
+\begin{code}
+-- | the result type to use when every match is needed, not just the
+-- first match of the RE against the source
+data Matches a =
+ Matches
+ { matchesSource :: !a -- ^ the source text being matched
+ , allMatches :: [Match a] -- ^ all captures found, left to right
+ }
+ deriving (Show,Eq)
+\end{code}
+
+\begin{code}
+-- | the result of matching a RE to a text once, listing the text that
+-- was matched and the named captures in the RE and all of the substrings
+-- matched, with the text captured by the whole RE; a complete failure
+-- to match will be represented with an empty array (with bounds (0,-1))
+data Match a =
+ Match
+ { matchSource :: !a -- ^ the whole source text
+ , captureNames :: CaptureNames -- ^ the RE's capture names
+ , matchArray :: !(Array CaptureOrdinal (Capture a))
+ -- ^ 0..n-1 captures,
+ -- starting with the
+ -- text matched by the
+ -- whole RE
+ }
+ deriving (Show,Eq)
+\end{code}
+
+\begin{code}
+-- | the matching of a single sub-expression against part of the source
+-- text
+data Capture a =
+ Capture
+ { captureSource :: !a -- ^ the whole text that was searched
+ , capturedText :: !a -- ^ the text that was matched
+ , captureOffset :: !Int -- ^ the number of characters preceding the
+ -- match with -1 used if no text was captured
+ -- by the RE (not even the empty string)
+ , captureLength :: !Int -- ^ the number of chacter in the captured
+ -- sub-string
+ }
+ deriving (Show,Eq)
+\end{code}
+
+\begin{code}
+instance Functor Matches where
+ fmap f Matches{..} =
+ Matches
+ { matchesSource = f matchesSource
+ , allMatches = map (fmap f) allMatches
+ }
+
+instance Functor Match where
+ fmap f Match{..} =
+ Match
+ { matchSource = f matchSource
+ , captureNames = captureNames
+ , matchArray = fmap (fmap f) matchArray
+ }
+
+instance Functor Capture where
+ fmap f c@Capture{..} =
+ c
+ { captureSource = f captureSource
+ , capturedText = f capturedText
+ }
+\end{code}
+
+\begin{code}
+-- | tests whether the RE matched the source text at all
+anyMatches :: Matches a -> Bool
+anyMatches = not . null . allMatches
+
+-- | count the matches
+countMatches :: Matches a -> Int
+countMatches = length . allMatches
+
+matches :: Matches a -> [a]
+matches = map capturedText . mainCaptures
+
+-- | extract the main capture from each match
+mainCaptures :: Matches a -> [Capture a]
+mainCaptures ac = [ capture c0 cs | cs<-allMatches ac ]
+ where
+ c0 = CID_ordinal $ CaptureOrdinal 0
+\end{code}
+
+
+
+\begin{code}
+-- | tests whether the RE matched the source text at all
+matched :: Match a -> Bool
+matched = isJust . matchCapture
+
+-- | tests whether the RE matched the source text at all
+matchedText :: Match a -> Maybe a
+matchedText = fmap capturedText . matchCapture
+
+-- | the top-level capture if the source text matched the RE,
+-- Nothing otherwise
+matchCapture :: Match a -> Maybe (Capture a)
+matchCapture = fmap fst . matchCaptures
+
+-- | the top-level capture and the sub captures if the text matched
+-- the RE, Nothing otherwise
+matchCaptures :: Match a -> Maybe (Capture a,[Capture a])
+matchCaptures Match{..} = case rangeSize (bounds matchArray) == 0 of
+ True -> Nothing
+ False -> Just (matchArray!0,drop 1 $ elems matchArray)
+
+-- | look up the text of the nth capture, 0 being the match of the whole
+-- RE against the source text, 1, the first bracketed sub-expression to
+-- be matched and so on
+captureText :: CaptureID -> Match a -> a
+captureText cid mtch = capturedText $ capture cid mtch
+
+-- | look up the text of the nth capture (0 being the match of the
+-- whole), returning Nothing if the Match doesn't contain the capture
+captureTextMaybe :: CaptureID -> Match a -> Maybe a
+captureTextMaybe cid mtch = do
+ cap <- captureMaybe cid mtch
+ case hasCaptured cap of
+ True -> Just $ capturedText cap
+ False -> Nothing
+
+-- | look up the nth capture, 0 being the match of the whole RE against
+-- the source text, 1, the first bracketed sub-expression to be matched
+-- and so on
+capture :: CaptureID -> Match a -> Capture a
+capture cid mtch = fromMaybe oops $ captureMaybe cid mtch
+ where
+ oops = error $ "capture: out of bounds (" ++ show cid ++ ")"
+
+-- | look up the nth capture, 0 being the match of the whole RE against
+-- the source text, 1, the first bracketed sub-expression to be matched
+-- and so on, returning Nothing if there is no such capture, or if the
+-- capture failed to capture anything (being in a failed alternate)
+captureMaybe :: CaptureID -> Match a -> Maybe (Capture a)
+captureMaybe cid mtch@Match{..} = do
+ cap <- case bounds matchArray `inRange` CaptureOrdinal i of
+ True -> Just $ matchArray ! CaptureOrdinal i
+ False -> Nothing
+ case hasCaptured cap of
+ True -> Just cap
+ False -> Nothing
+ where
+ i = lookupCaptureID cid mtch
+
+lookupCaptureID :: CaptureID -> Match a -> Int
+lookupCaptureID cid Match{..} = findCaptureID cid captureNames
+\end{code}
+
+
+\begin{code}
+-- | test if the capture has matched any text
+hasCaptured :: Capture a -> Bool
+hasCaptured = (>=0) . captureOffset
+
+-- | returns the text preceding the match
+capturePrefix :: Extract a => Capture a -> a
+capturePrefix Capture{..} = before captureOffset captureSource
+
+-- | returns the text after the match
+captureSuffix :: Extract a => Capture a -> a
+captureSuffix Capture{..} = after (captureOffset+captureLength) captureSource
+\end{code}
+
+
+
+
+\begin{code}
+-- | for matching just the first RE against the source text
+instance RegexContext regex source (AllTextSubmatches (Array Int) (source,(Int,Int))) =>
+ RegexContext regex source (Match source) where
+ match r s = cvt s $ getAllTextSubmatches $ match r s
+ matchM r s = do
+ y <- matchM r s
+ return $ cvt s $ getAllTextSubmatches y
+
+-- | for matching all REs against the source text
+instance RegexContext regex source [MatchText source] =>
+ RegexContext regex source (Matches source) where
+ match r s = Matches s $ map (cvt s) $ match r s
+ matchM r s = do
+ y <- matchM r s
+ return $ Matches s $ map (cvt s) y
+\end{code}
+
+\begin{code}
+cvt :: source -> MatchText source -> Match source
+cvt hay arr =
+ Match
+ { matchSource = hay
+ , captureNames = noCaptureNames
+ , matchArray =
+ ixmap (CaptureOrdinal lo,CaptureOrdinal hi) getCaptureOrdinal $
+ fmap f arr
+ }
+ where
+ (lo,hi) = bounds arr
+
+ f (ndl,(off,len)) =
+ Capture
+ { captureSource = hay
+ , capturedText = ndl
+ , captureOffset = off
+ , captureLength = len
+ }
+\end{code}
Oops, something went wrong.

0 comments on commit 7da04d1

Please sign in to comment.