Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Filter out all the links on a downloaded page.

  • Loading branch information...
commit f7ff379fbbe17e827f18a8e634f465b0f35ded64 1 parent ef1f64f
@bos authored
Showing with 13 additions and 1 deletion.
  1. +10 −0 src/Links.hs
  2. +2 −1  src/Main.hs
  3. +1 −0  strange-loop-2011.cabal
View
10 src/Links.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Links where
+
+import Text.HTML.TagSoup
+import qualified Data.Text.Lazy as T
+
+links :: T.Text -> [T.Text]
+links = filter (not . T.null) . map (fromAttrib "href") . filter (\t -> fromAttrib "rel" t /= "nofollow") . filter (isTagOpenName "a") . canonicalizeTags . parseTags
+
View
3  src/Main.hs
@@ -1,8 +1,9 @@
import System.Environment
import Download
+import Links
main = do
args <- getArgs
putStrLn ("So! Your args are " ++ show args)
page <- download (head args)
- print page
+ print (links page)
View
1  strange-loop-2011.cabal
@@ -11,4 +11,5 @@ executable strange-loop
base,
bytestring,
http-enumerator,
+ tagsoup,
text
Please sign in to comment.
Something went wrong with that request. Please try again.