Permalink
Browse files

Added UTF-8 support.

+ Modified Gitit.Git, so that arguments to shell commands are converted
  to UTF-8 and the output is converted back to unicode strigs.

+ In addition, gitLsTree now returns just a list of files, rather than
  the raw output of 'git ls-tree'. Since git outputs filenames with
  UTF-8 high characters with surrounding double-quotes and octal-escaped
  characters, gitLsTree parses these and converts them to regular
  haskell unicode strings.

+ Added a module Gitit.HAppS that exports some wrappers around HAppS
  functions for better handling of UTF-8.

+ Added a module Gitit.HStringTemplate that exports a new version
  of setAttribute that handles UTF-8 properly.

+ Made a few other changes to Gitit.hs for proper handling of
  UTF-8 in paths, forms, and elsewhere.  Pages can now have UTF-8
  names, provided the file system supports UTF-8 pathnames.

+ Version bump to 0.3.4.
  • Loading branch information...
1 parent 342ab49 commit ebbec6f2e3092326e03ec6f9ec7dfd22f17b1541 @jgm committed Dec 31, 2008
Showing with 146 additions and 32 deletions.
  1. +23 −20 Gitit.hs
  2. +34 −8 Gitit/Git.hs
  3. +52 −0 Gitit/HAppS.hs
  4. +31 −0 Gitit/HStringTemplate.hs
  5. +1 −1 Makefile
  6. +2 −1 README.markdown
  7. +3 −2 gitit.cabal
View
@@ -19,7 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
module Main where
-import HAppS.Server
+import HAppS.Server hiding (look, lookRead, lookCookieValue, mkCookie)
+import Gitit.HAppS (look, lookRead, lookCookieValue, mkCookie)
import HAppS.State hiding (Method)
import System.Environment
import System.IO.UTF8
@@ -40,6 +41,7 @@ import Data.List (intersect, intersperse, intercalate, sort, nub, sortBy, isSuff
import Data.Maybe (fromMaybe, fromJust, mapMaybe, isNothing)
import Data.ByteString.UTF8 (fromString, toString)
import qualified Data.ByteString.Lazy.UTF8 as L (fromString)
+import Codec.Binary.UTF8.String (decodeString, encodeString)
import qualified Data.Map as M
import Data.Ord (comparing)
import Data.Digest.Pure.SHA (sha512, showDigest)
@@ -56,6 +58,7 @@ import System.Console.GetOpt
import System.Exit
import Text.Highlighting.Kate
import qualified Text.StringTemplate as T
+import Gitit.HStringTemplate (setAttribute)
import Data.IORef
import System.IO.Unsafe (unsafePerformIO)
@@ -398,7 +401,7 @@ ifLoggedIn responder =
handle :: (String -> Bool) -> Method -> (String -> Params -> Web Response) -> Handler
handle pathtest meth responder = uriRest $ \uri ->
- let path' = uriPath uri
+ let path' = decodeString $ uriPath uri
in if pathtest path'
then withData $ \params ->
[ withRequest $ \req ->
@@ -460,7 +463,7 @@ isSourceCode :: String -> Bool
isSourceCode = not . null . languagesByExtension . takeExtension
urlForPage :: String -> String
-urlForPage page = '/' : (substitute "%2f" "/" $ urlEncode page)
+urlForPage page = '/' : (substitute "%2f" "/" $ urlEncode $ encodeString page)
-- this is needed so that browsers recognize relative URLs correctly
pathForPage :: String -> FilePath
@@ -500,7 +503,7 @@ showFileAsText file params = do
randomPage :: String -> Params -> Web Response
randomPage _ _ = do
- files <- liftM (map (unwords . drop 3 . words) . lines) (gitLsTree "HEAD")
+ files <- gitLsTree "HEAD"
let pages = map dropExtension $ filter (\f -> takeExtension f == ".page" && not (":discuss.page" `isSuffixOf` f)) files
if null pages
then error "No pages found!"
@@ -810,7 +813,7 @@ indexPage :: String -> Params -> Web Response
indexPage _ params = do
let page = "_index"
let revision = pRevision params
- files <- gitLsTree revision >>= return . map (unwords . drop 3 . words) . lines
+ files <- gitLsTree revision
let htmlIndex = fileListToHtml "/" $ map splitPath $ sort $ filter (\f -> not (":discuss.page" `isSuffixOf` f)) files
formattedPage (defaultPageLayout { pgShowPageTools = False, pgTabs = [], pgScripts = ["folding.js"], pgTitle = "All pages" }) page params htmlIndex
@@ -921,23 +924,23 @@ formattedPage layout page params htmlContents = do
else ulist ! [theclass "messages"] << map (li <<) messages
templ <- liftIO $ readIORef template
let filledTemp = T.render $
- T.setAttribute "pagetitle" pageTitle $
- T.setAttribute "javascripts" javascriptlinks $
- T.setAttribute "pagename" page $
+ setAttribute "pagetitle" pageTitle $
+ setAttribute "javascripts" javascriptlinks $
+ setAttribute "pagename" page $
(case user of
- Just u -> T.setAttribute "user" u
+ Just u -> setAttribute "user" u
Nothing -> id) $
- (if isPage page then T.setAttribute "ispage" "true" else id) $
- (if pgShowPageTools layout then T.setAttribute "pagetools" "true" else id) $
- (if pPrintable params then T.setAttribute "printable" "true" else id) $
- (if pRevision params == "HEAD" then id else T.setAttribute "nothead" "true") $
- T.setAttribute "revision" revision $
- T.setAttribute "sha1" sha1 $
- T.setAttribute "searchbox" (renderHtmlFragment searchbox) $
- T.setAttribute "exportbox" (renderHtmlFragment $ exportBox page params) $
- T.setAttribute "tabs" (renderHtmlFragment tabs) $
- T.setAttribute "messages" (renderHtmlFragment htmlMessages) $
- T.setAttribute "content" (renderHtmlFragment htmlContents) $
+ (if isPage page then setAttribute "ispage" "true" else id) $
+ (if pgShowPageTools layout then setAttribute "pagetools" "true" else id) $
+ (if pPrintable params then setAttribute "printable" "true" else id) $
+ (if pRevision params == "HEAD" then id else setAttribute "nothead" "true") $
+ setAttribute "revision" revision $
+ setAttribute "sha1" sha1 $
+ setAttribute "searchbox" (renderHtmlFragment searchbox) $
+ setAttribute "exportbox" (renderHtmlFragment $ exportBox page params) $
+ setAttribute "tabs" (renderHtmlFragment tabs) $
+ setAttribute "messages" (renderHtmlFragment htmlMessages) $
+ setAttribute "content" (renderHtmlFragment htmlContents) $
templ
ok $ setContentType "text/html" $ toResponse filledTemp
View
@@ -39,17 +39,17 @@ where
import Control.Monad (unless, liftM)
import Control.Monad.Trans
import Network.CGI (urlEncode)
-import System.FilePath
import System.Exit
import System.Process
import qualified Text.ParserCombinators.Parsec as P
-import qualified Data.ByteString.Lazy as B
import System.Directory
import System.IO (openTempFile)
-import Data.ByteString.Lazy.UTF8 (toString)
-import Codec.Binary.UTF8.String (encodeString)
+import Prelude hiding (readFile, writeFile)
+import System.IO.UTF8
+import Codec.Binary.UTF8.String (encodeString, decodeString)
import HAppS.State
import Gitit.State
+import Data.Char (chr)
-- | Run shell command and return error status, standard output, and error output.
runShellCommand :: FilePath -> Maybe [(String, String)] -> String -> [String] -> IO (ExitCode, String, String)
@@ -59,8 +59,10 @@ runShellCommand workingDir environment command optionList = do
(errorPath, hErr) <- openTempFile tempPath "err"
hProcess <- runProcess command optionList (Just workingDir) environment Nothing (Just hOut) (Just hErr)
status <- waitForProcess hProcess
- errorOutput <- liftM toString (B.readFile errorPath)
- output <- liftM toString (B.readFile outputPath)
+ errorOutput <- readFile errorPath
+ output <- readFile outputPath
+ removeFile errorPath
+ removeFile outputPath
return (status, errorOutput, output)
-- | Run git command and return error status, standard output, and error output. The repository
@@ -93,13 +95,37 @@ gitLog since author files = do
Right parsed -> return parsed
else error $ "git whatchanged returned error status.\n" ++ err
-gitLsTree :: MonadIO m => String -> m String
+gitLsTree :: MonadIO m => String -> m [String]
gitLsTree rev = do
(status, errOutput, output) <- runGitCommand "ls-tree" ["-r", rev]
if status == ExitSuccess
- then return output
+ then return $ map (convertEncoded . (unwords . drop 3 . words)) $ lines output
else error $ "git ls-tree returned error status.\n" ++ errOutput
+-- | git ls-tree returns UTF-8 filenames in quotes, with characters octal-escaped.
+-- like this: "\340\244\226.page"
+-- This function decodes these.
+convertEncoded :: String -> String
+convertEncoded s =
+ case P.parse pEncodedString s s of
+ Left _ -> s
+ Right res -> res
+
+pEncodedString :: P.GenParser Char st [Char]
+pEncodedString = do
+ P.char '"'
+ res <- P.many1 (pOctalChar P.<|> P.anyChar)
+ if last res == '"'
+ then return $ decodeString $ init res
+ else fail "No ending quotation mark."
+
+pOctalChar :: P.GenParser Char st Char
+pOctalChar = P.try $ do
+ P.char '\\'
+ ds <- P.count 3 (P.oneOf "01234567")
+ let num = read $ "0o" ++ ds
+ return $ chr num
+
gitGrep :: MonadIO m => [String] -> m String
gitGrep patterns = do
(status, errOutput, output) <- runGitCommand "grep" (["--all-match", "--ignore-case", "--word-regexp"] ++
View
@@ -0,0 +1,52 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- Replacements for HAppS functions that don't handle UTF-8 properly.
+-}
+
+module Gitit.HAppS
+ ( look
+ , lookRead
+ , lookCookieValue
+ , mkCookie
+ )
+where
+import HAppS.Server hiding (look, lookRead, lookCookieValue, mkCookie)
+import qualified HAppS.Server (lookCookieValue, mkCookie)
+import Text.Pandoc.CharacterReferences (decodeCharacterReferences)
+import Control.Monad (liftM)
+import Data.ByteString.Lazy.UTF8 (toString)
+import Codec.Binary.UTF8.String (encodeString, decodeString)
+
+-- HAppS's look, lookRead, and lookCookieValue encode unicode characters
+-- (outside the standard latin1 range) using decimal character
+-- references. For gitit's purposes, we want them to return regular
+-- unicode characters instead.
+
+look :: String -> RqData String
+look = liftM (decodeCharacterReferences . toString) . HAppS.Server.lookBS
+
+lookRead :: Read a => String -> RqData a
+lookRead = liftM read . look
+
+lookCookieValue :: String -> RqData String
+lookCookieValue = liftM decodeString . HAppS.Server.lookCookieValue
+
+mkCookie :: String -> String -> Cookie
+mkCookie name val = HAppS.Server.mkCookie name (encodeString val)
+
View
@@ -0,0 +1,31 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- Replacements for HStringTemplate functions that don't handle
+ UTF-8 properly.
+-}
+
+module Gitit.HStringTemplate ( setAttribute )
+where
+import Codec.Binary.UTF8.String (encodeString)
+import qualified Text.StringTemplate as T
+
+-- | A wrapper around HStringTemplate's setAttribute that encodes strings
+-- in UTF-8.
+setAttribute :: String -> String -> T.StringTemplate String -> T.StringTemplate String
+setAttribute attrName = T.setAttribute attrName . encodeString
View
@@ -1,4 +1,4 @@
-gitit: Gitit.hs Gitit/Git.hs Gitit/State.hs
+gitit: Gitit.hs Gitit/Git.hs Gitit/State.hs Gitit/HAppS.hs Gitit/HStringTemplate.hs
ghc --make -Wall -o gitit Gitit.hs -threaded -idata
.PHONY: static clean
View
@@ -250,7 +250,8 @@ Character encodings
===================
Gitit assumes that the page files (stored in the git repository) are
-encoded as UTF-8.
+encoded as UTF-8. Even page names may be UTF-8 if the file system supports
+this. You should use a UTF-8 locale when running gitit.
Reporting bugs
==============
View
@@ -1,5 +1,5 @@
name: gitit
-version: 0.3.3
+version: 0.3.4
Cabal-version: >= 1.2
build-type: Simple
synopsis: Wiki using HAppS, git, and pandoc.
@@ -42,7 +42,8 @@ data-files: css/screen.css, css/print.css, css/ie.css, css/hk-pyg.css,
Executable gitit
hs-source-dirs: .
main-is: Gitit.hs
- other-modules: Gitit.State, Gitit.Git, Paths_gitit
+ other-modules: Gitit.State, Gitit.Git, Gitit.HAppS, Gitit.HStringTemplate,
+ Paths_gitit
build-depends: base >=3, parsec < 3, pretty, xhtml, containers, pandoc
>= 1.1, process, filepath, directory, mtl, cgi,
network, old-time, highlighting-kate, bytestring,

0 comments on commit ebbec6f

Please sign in to comment.