Skip to content
Browse files

export just sanitizeXSS

  • Loading branch information...
1 parent 5dbc142 commit eb5b78d42934986bfea0432912fd9129b90fda27 @gregwebs committed
Showing with 6 additions and 13 deletions.
  1. +1 −1 README.md
  2. +5 −12 Text/HTML/SanitizeXSS.hs
View
2 README.md
@@ -21,7 +21,7 @@ TagSoup is used to parse the HTML, and it does a good job. However TagSoup does
<a href>, <a href>
<a></a>, <a/>
-img and br tags will be output as a single self-closing tags. Other self-closing tags will be output as an open and closing pair. There are future updates to TagSoup planned to fix these cases.
+img and br tags will be output as a single self-closing tags. Other self-closing tags will be output as an open and closing pair. So <img /> or <img><img> converts to <img />, and <a></a> or <a/> converts to <a></a>. There are future updates to TagSoup planned to fix these cases.
Integration
===========
View
17 Text/HTML/SanitizeXSS.hs
@@ -1,17 +1,17 @@
-module Text.HTML.SanitizeXSS where
+module Text.HTML.SanitizeXSS (sanitizeXSS) where
import Text.HTML.TagSoup
import Data.Set (Set(), member, fromList)
-import Data.Char ( toLower, isAscii )
+import Data.Char ( toLower )
import Network.URI ( parseURIReference, URI (..),
- isAllowedInURI, escapeURIString, unEscapeString, uriScheme )
-import Codec.Binary.UTF8.String ( encodeString, decodeString )
+ isAllowedInURI, escapeURIString, uriScheme )
+import Codec.Binary.UTF8.String ( encodeString )
sanitizeXSS :: String -> String
sanitizeXSS = renderTagsOptions renderOptions {
- optMinimize = \x -> x `elem` ["br","img"]
+ optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . safeTags . parseTags
where
safeTags :: [Tag String] -> [Tag String]
@@ -44,13 +44,6 @@ sanitaryURI u =
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
--- | Unescape unicode and some special characters in a URI, but
--- without introducing spaces.
-unescapeURI :: String -> String
-unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
- decodeString . unEscapeString
-
-
safeURISchemes :: Set String
safeURISchemes = fromList [ "", "http:", "https:", "ftp:", "mailto:", "file:",

0 comments on commit eb5b78d

Please sign in to comment.
Something went wrong with that request. Please try again.