diff --git a/README.md b/README.md index a930481cc..060f014ce 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#Gander [![Build Status](https://img.shields.io/travis/intenthq/gander.svg)](https://travis-ci.org/intenthq/gander) [![Coverage Status] (https://img.shields.io/coveralls/intenthq/gander.svg)](https://coveralls.io/github/intenthq/gander?branch=master) [![Maven Central](https://img.shields.io/maven-central/v/com.intenthq/gander_2.11.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.intenthq%22%20AND%20a%3A%22gander_2.11%22) [![Join the chat at https://gitter.im/intenthq/gander](https://img.shields.io/badge/gitter-join%20chat-green.svg)](https://gitter.im/intenthq/gander) +#Gander [![Build Status](https://img.shields.io/travis/intenthq/gander/master.svg)](https://travis-ci.org/intenthq/gander) [![Coverage Status] (https://img.shields.io/coveralls/intenthq/gander.svg)](https://coveralls.io/github/intenthq/gander?branch=master) [![Maven Central](https://img.shields.io/maven-central/v/com.intenthq/gander_2.11.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.intenthq%22%20AND%20a%3A%22gander_2.11%22) [![Join the chat at https://gitter.im/intenthq/gander](https://img.shields.io/badge/gitter-join%20chat-green.svg)](https://gitter.im/intenthq/gander) **Gander is a scala library that extracts metadata and content from web pages.** diff --git a/src/test/scala/com/intenthq/gander/ContentExtractorSpec.scala b/src/test/scala/com/intenthq/gander/ContentExtractorSpec.scala index 6c44b2177..cebb01db3 100644 --- a/src/test/scala/com/intenthq/gander/ContentExtractorSpec.scala +++ b/src/test/scala/com/intenthq/gander/ContentExtractorSpec.scala @@ -17,6 +17,48 @@ class ContentExtractorSpec extends Specification { } } + "extractCanonicalLink" >> { + "should return none if no link found" >> { + val html = + """ + | + | + |""".stripMargin + extractCanonicalLink(Jsoup.parse(html)) must beNone + } + + "should extract the canonical link from the meta tag" >> { + val html = + """ + | + | + | + | + | + |""".stripMargin + extractCanonicalLink(Jsoup.parse(html)) must beSome("http://example.com/canonical") + } + "should extract the facebook og:url meta tag" >> { + val html = + """ + | + | + | + | + |""".stripMargin + extractCanonicalLink(Jsoup.parse(html)) must beSome("http://example.com/og") + } + "should extract the twitter:url meta tag" >> { + val html = + """ + | + | + | + |""".stripMargin + extractCanonicalLink(Jsoup.parse(html)) must beSome("http://example.com/twitter") + } + } + "extractLang" >> { "should extract lang from html tag and give priority to it" >> { val html =