Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

FIX #3079, dodgy parsing of LANGUAGE pragmas

I ended up rewriting this horrible bit of code, using (yikes) lazy I/O
to slurp in the source file a chunk at a time.  The old code tried to
read the file a chunk at a time, but failed with LANGUAGE pragmas
because the parser for LANGUAGE has state and the state wasn't being
saved between chunks.  We're still closing the Handle eagerly, so
there shouldn't be any problems here.
  • Loading branch information...
commit c197fe602ed4aadf09affe0cdc18e7158d262012 1 parent c5e9e31
Simon Marlow authored March 12, 2009

Showing 1 changed file with 71 additions and 57 deletions. Show diff stats Hide diff stats

  1. 128  compiler/main/HeaderInfo.hs
128  compiler/main/HeaderInfo.hs
@@ -23,8 +23,7 @@ import FastString
23 23
 import HsSyn		( ImportDecl(..), HsModule(..) )
24 24
 import Module		( ModuleName, moduleName )
25 25
 import PrelNames        ( gHC_PRIM, mAIN_NAME )
26  
-import StringBuffer	( StringBuffer(..), hGetStringBufferBlock
27  
-                        , appendStringBuffers )
  26
+import StringBuffer
28 27
 import SrcLoc
29 28
 import DynFlags
30 29
 import ErrUtils
@@ -38,6 +37,7 @@ import MonadUtils       ( MonadIO )
38 37
 import Exception
39 38
 import Control.Monad
40 39
 import System.IO
  40
+import System.IO.Unsafe
41 41
 import Data.List
42 42
 
43 43
 ------------------------------------------------------------------------------
@@ -93,21 +93,57 @@ getOptionsFromFile dflags filename
93 93
     = Exception.bracket
94 94
 	      (openBinaryFile filename ReadMode)
95 95
               (hClose)
96  
-              (\handle ->
97  
-                   do buf <- hGetStringBufferBlock handle blockSize
98  
-                      loop handle buf)
99  
-    where blockSize = 1024
100  
-          loop handle buf
101  
-              | len buf == 0 = return []
102  
-              | otherwise
103  
-              = case getOptions' dflags buf filename of
104  
-                  (Nothing, opts) -> return opts
105  
-                  (Just buf', opts) -> do nextBlock <- hGetStringBufferBlock handle blockSize
106  
-                                          newBuf <- appendStringBuffers buf' nextBlock
107  
-                                          if len newBuf == len buf
108  
-                                             then return opts
109  
-                                             else do opts' <- loop handle newBuf
110  
-                                                     return (opts++opts')
  96
+              (\handle -> do
  97
+                  opts <- fmap getOptions' $ lazyGetToks dflags filename handle
  98
+                  seqList opts $ return opts)
  99
+
  100
+blockSize :: Int
  101
+-- blockSize = 17 -- for testing :-)
  102
+blockSize = 1024
  103
+
  104
+lazyGetToks :: DynFlags -> FilePath -> Handle -> IO [Located Token]
  105
+lazyGetToks dflags filename handle = do
  106
+  buf <- hGetStringBufferBlock handle blockSize
  107
+  unsafeInterleaveIO $ lazyLexBuf handle (pragState dflags buf loc) False
  108
+ where
  109
+  loc  = mkSrcLoc (mkFastString filename) 1 0
  110
+
  111
+  lazyLexBuf :: Handle -> PState -> Bool -> IO [Located Token]
  112
+  lazyLexBuf handle state eof = do
  113
+    case unP (lexer return) state of
  114
+      POk state' t -> do
  115
+        -- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
  116
+        if atEnd (buffer state') && not eof
  117
+           -- if this token reached the end of the buffer, and we haven't
  118
+           -- necessarily read up to the end of the file, then the token might
  119
+           -- be truncated, so read some more of the file and lex it again.
  120
+           then getMore handle state
  121
+           else case t of
  122
+                  L _ ITeof -> return [t]
  123
+                  _other    -> do rest <- lazyLexBuf handle state' eof
  124
+                                  return (t : rest)
  125
+      _ | not eof   -> getMore handle state
  126
+        | otherwise -> return []
  127
+  
  128
+  getMore :: Handle -> PState -> IO [Located Token]
  129
+  getMore handle state = do
  130
+     -- pprTrace "getMore" (text (show (buffer state))) (return ())
  131
+     nextbuf <- hGetStringBufferBlock handle blockSize
  132
+     if (len nextbuf == 0) then lazyLexBuf handle state True else do
  133
+     newbuf <- appendStringBuffers (buffer state) nextbuf
  134
+     unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False
  135
+
  136
+
  137
+getToks :: DynFlags -> FilePath -> StringBuffer -> [Located Token]
  138
+getToks dflags filename buf = lexAll (pragState dflags buf loc)
  139
+ where
  140
+  loc  = mkSrcLoc (mkFastString filename) 1 0
  141
+
  142
+  lexAll state = case unP (lexer return) state of
  143
+                   POk _      t@(L _ ITeof) -> [t]
  144
+                   POk state' t -> t : lexAll state'
  145
+                   _ -> [L (last_loc state) ITeof]
  146
+
111 147
 
112 148
 -- | Parse OPTIONS and LANGUAGE pragmas of the source file.
113 149
 --
@@ -117,76 +153,54 @@ getOptions :: DynFlags
117 153
            -> FilePath     -- ^ Source filename.  Used for location info.
118 154
            -> [Located String] -- ^ Parsed options.
119 155
 getOptions dflags buf filename
120  
-    = case getOptions' dflags buf filename of
121  
-        (_,opts) -> opts
  156
+    = getOptions' (getToks dflags filename buf)
122 157
 
123 158
 -- The token parser is written manually because Happy can't
124 159
 -- return a partial result when it encounters a lexer error.
125 160
 -- We want to extract options before the buffer is passed through
126 161
 -- CPP, so we can't use the same trick as 'getImports'.
127  
-getOptions' :: DynFlags
128  
-            -> StringBuffer         -- Input buffer
129  
-            -> FilePath             -- Source file. Used for msgs only.
130  
-            -> ( Maybe StringBuffer -- Just => we can use more input
131  
-               , [Located String]   -- Options.
132  
-               )
133  
-getOptions' dflags buf filename
134  
-    = parseToks (lexAll (pragState dflags buf loc))
135  
-    where loc  = mkSrcLoc (mkFastString filename) 1 0
136  
-
137  
-          getToken (_buf,L _loc tok) = tok
138  
-          getLoc (_buf,L loc _tok) = loc
139  
-          getBuf (buf,_tok) = buf
140  
-          combine opts (flag, opts') = (flag, opts++opts')
141  
-          add opt (flag, opts) = (flag, opt:opts)
  162
+getOptions' :: [Located Token]      -- Input buffer
  163
+            -> [Located String]     -- Options.
  164
+getOptions' toks
  165
+    = parseToks toks
  166
+    where 
  167
+          getToken (L _loc tok) = tok
  168
+          getLoc (L loc _tok) = loc
142 169
 
143 170
           parseToks (open:close:xs)
144 171
               | IToptions_prag str <- getToken open
145 172
               , ITclose_prag       <- getToken close
146  
-              = map (L (getLoc open)) (words str) `combine`
  173
+              = map (L (getLoc open)) (words str) ++
147 174
                 parseToks xs
148 175
           parseToks (open:close:xs)
149 176
               | ITinclude_prag str <- getToken open
150 177
               , ITclose_prag       <- getToken close
151  
-              = map (L (getLoc open)) ["-#include",removeSpaces str] `combine`
  178
+              = map (L (getLoc open)) ["-#include",removeSpaces str] ++
152 179
                 parseToks xs
153 180
           parseToks (open:close:xs)
154 181
               | ITdocOptions str <- getToken open
155 182
               , ITclose_prag     <- getToken close
156 183
               = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
157  
-                `combine` parseToks xs
  184
+                ++ parseToks xs
158 185
           parseToks (open:xs)
159 186
               | ITdocOptionsOld str <- getToken open
160 187
               = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
161  
-                `combine` parseToks xs
  188
+                ++ parseToks xs
162 189
           parseToks (open:xs)
163 190
               | ITlanguage_prag <- getToken open
164 191
               = parseLanguage xs
165  
-          -- The last token before EOF could have been truncated.
166  
-          -- We ignore it to be on the safe side.
167  
-          parseToks [tok,eof]
168  
-              | ITeof <- getToken eof
169  
-              = (Just (getBuf tok),[])
170  
-          parseToks (eof:_)
171  
-              | ITeof <- getToken eof
172  
-              = (Just (getBuf eof),[])
173  
-          parseToks _ = (Nothing,[])
174  
-          parseLanguage ((_buf,L loc (ITconid fs)):rest)
175  
-              = checkExtension (L loc fs) `add`
  192
+          parseToks _ = []
  193
+          parseLanguage (L loc (ITconid fs):rest)
  194
+              = checkExtension (L loc fs) :
176 195
                 case rest of
177  
-                  (_,L _loc ITcomma):more -> parseLanguage more
178  
-                  (_,L _loc ITclose_prag):more -> parseToks more
179  
-                  (_,L loc _):_ -> languagePragParseError loc
  196
+                  (L _loc ITcomma):more -> parseLanguage more
  197
+                  (L _loc ITclose_prag):more -> parseToks more
  198
+                  (L loc _):_ -> languagePragParseError loc
180 199
                   [] -> panic "getOptions'.parseLanguage(1) went past eof token"
181 200
           parseLanguage (tok:_)
182 201
               = languagePragParseError (getLoc tok)
183 202
           parseLanguage []
184 203
               = panic "getOptions'.parseLanguage(2) went past eof token"
185  
-          lexToken t = return t
186  
-          lexAll state = case unP (lexer lexToken) state of
187  
-                           POk _      t@(L _ ITeof) -> [(buffer state,t)]
188  
-                           POk state' t -> (buffer state,t):lexAll state'
189  
-                           _ -> [(buffer state,L (last_loc state) ITeof)]
190 204
 
191 205
 -----------------------------------------------------------------------------
192 206
 

0 notes on commit c197fe6

Please sign in to comment.
Something went wrong with that request. Please try again.