Permalink
Browse files

Introduce parseFileUtf8 to explicitly use utf8 for parsing a file, an…

…d update tests to use it where needed. Closes #21
  • Loading branch information...
alanz committed Oct 21, 2012
1 parent 2ee7754 commit 98e5bfe0f30d836426db8a653731dd12c4c7d240
Showing with 27 additions and 3 deletions.
  1. +6 −0 README
  2. +1 −1 language-javascript.cabal
  3. +7 −1 runtests.hs
  4. +1 −0 src/Language/JavaScript/Parser.hs
  5. +12 −1 src/Language/JavaScript/Parser/Parser.hs
View
6 README
@@ -43,6 +43,12 @@ Note: The generation of the lexical analyser has been separated out,
Changes
+0.5.7 Remove the hs-source-dirs from test suite to prevent compilation
+ issues (@nomeata)
+
+ Introduce parseFileUtf8 to explicitly use utf8 for parsing a
+ file, and update tests to use it where needed. Closes #21
+
0.5.6 Remove constraint on Alex 3.0.1, it is only required to make
changes to the lexer. Closes #19
@@ -1,5 +1,5 @@
Name: language-javascript
-Version: 0.5.6
+Version: 0.5.7
Synopsis: Parser for JavaScript
Description: Parses Javascript into an Abstract Syntax Tree (AST). Initially intended as frontend to hjsmin.
.
View
@@ -294,7 +294,7 @@ testSuite = testGroup "Parser"
, testCase "unicode4" (testProg "x=\"àáâãäå\";y='\3012a\0068'" "Right (JSSourceElementsTop [JSExpression [JSIdentifier \"x\",JSOperator JSLiteral \"=\",JSStringLiteral '\"' \"\\224\\225\\226\\227\\228\\229\"],JSLiteral \";\",JSExpression [JSIdentifier \"y\",JSOperator JSLiteral \"=\",JSStringLiteral '\\'' \"\\3012aD\"],JSLiteral \"\"])")
- , testCase "unicode5f" (testFile "./test/Unicode.js" "JSSourceElementsTop [JSExpression [JSIdentifier \"\\224\\225\\226\\227\\228\\229\",JSOperator JSLiteral \"=\",JSDecimal \"1\"],JSLiteral \";\",JSLiteral \"\"]")
+ , testCase "unicode5f" (testFileUtf8 "./test/Unicode.js" "JSSourceElementsTop [JSExpression [JSIdentifier \"\\224\\225\\226\\227\\228\\229\",JSOperator JSLiteral \"=\",JSDecimal \"1\"],JSLiteral \";\",JSLiteral \"\"]")
, testCase "bug2.a" (testProg "function() {\nz = function /*z*/(o) {\nreturn r;\n};}" "Right (JSSourceElementsTop [JSExpression [JSFunctionExpression [] [] (JSBlock ([JSExpression [JSIdentifier \"z\",JSOperator JSLiteral \"=\",JSFunctionExpression [] [JSIdentifier \"o\"] (JSBlock ([JSReturn [JSExpression [JSIdentifier \"r\"]] JSLiteral \";\"]))],JSLiteral \";\"]))],JSLiteral \"\"])")
@@ -778,6 +778,12 @@ testFile fileName expected = do
-- expected @=? (liftM show $ parseFile fileName)
(expected @=? (showStripped res))
+testFileUtf8 :: FilePath -> String -> IO ()
+testFileUtf8 fileName expected = do
+ res <- parseFileUtf8 fileName
+ -- expected @=? (liftM show $ parseFile fileName)
+ (expected @=? (showStripped res))
+
-- Set emacs mode
-- Local Variables:
@@ -3,6 +3,7 @@ module Language.JavaScript.Parser
PA.parse
, PA.readJs
, PA.parseFile
+ , PA.parseFileUtf8
, PA.showStripped
, PA.showStrippedMaybe
, JSNode(..)
@@ -4,6 +4,7 @@ module Language.JavaScript.Parser.Parser (
, readJs
-- , readJsKeepComments
, parseFile
+ , parseFileUtf8
-- * Parsing expressions
-- parseExpr
, parseUsing
@@ -15,7 +16,7 @@ import Language.JavaScript.Parser.ParseError
import Language.JavaScript.Parser.Grammar5
import Language.JavaScript.Parser.Lexer
import qualified Language.JavaScript.Parser.AST as AST
-
+import System.IO
-- | Parse one compound statement, or a sequence of simple statements.
-- Generally used for interactive input, such as from the command line of an interpreter.
@@ -43,6 +44,16 @@ parseFile filename =
x <- readFile (filename)
return $ readJs x
+-- | Parse the given file, explicitly setting the encoding to UTF8
+-- when reading it
+parseFileUtf8 :: FilePath -> IO AST.JSNode
+parseFileUtf8 filename =
+ do
+ h <- openFile filename ReadMode
+ hSetEncoding h utf8
+ x <- hGetContents h
+ return $ readJs x
+
showStripped :: AST.JSNode -> String
showStripped ast = AST.showStripped ast

0 comments on commit 98e5bfe

Please sign in to comment.