Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

adding unit tests for more sites and also fixing the domain that wasn…

…'t being set correctly on the article object
  • Loading branch information...
commit 3617b848978c73de04031a32afaddefb7a6980f1 1 parent fde6572
@jiminoc jiminoc authored
View
18 src/main/java/com/jimplush/goose/Article.java
@@ -8,11 +8,12 @@
import org.apache.log4j.Logger;
import org.jsoup.nodes.Element;
+import java.net.MalformedURLException;
+import java.net.URL;
import java.util.ArrayList;
/**
* This class represents the extraction of an Article from a website
- *
*/
public class Article {
@@ -81,6 +82,7 @@
/**
* returns the title of the webpage
+ *
* @return
*/
public String getTitle() {
@@ -91,7 +93,7 @@ public void setTitle(String title) {
this.title = title;
}
- public String getMetaDescription() {
+ public String getMetaDescription() {
return metaDescription;
}
@@ -119,7 +121,16 @@ public String getDomain() {
return domain;
}
- public void setDomain(String domain) {
+ public void setDomain(String urlToParse) {
+ String domain = "";
+
+ URL url = null;
+ try {
+ url = new URL(urlToParse);
+ domain = url.getHost();
+ } catch (MalformedURLException e) {
+ logger.error(e.toString(), e);
+ }
this.domain = domain;
}
@@ -140,7 +151,6 @@ public void setMovies(ArrayList<Element> movies) {
}
-
public ArrayList<String> getImageCandidates() {
return imageCandidates;
}
View
12 src/test/java/com/jimplush/goose/ArticleTest.java
@@ -15,9 +15,19 @@ public void testArticle()
{
Article article = new Article();
article.setTitle("This is a title");
+ assertEquals("This is a title", article.getTitle());
+ }
- assertEquals("This is a title", article.getTitle());
+ public void testSettingDomainOnArticle() {
+
+ Article article = new Article();
+ article.setDomain("http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere");
+ assertEquals("grapevinyl.com", article.getDomain());
+
+ article.setDomain("http://www.economist.com/v/84/magnetic-morning/getting-nowhere");
+ assertEquals("www.economist.com", article.getDomain());
}
+
}
View
10 src/test/java/com/jimplush/goose/GoldSitesTest.java
@@ -466,5 +466,15 @@ public void testTheVacationGals() {
assertTrue(article.getCleanedArticleText().startsWith("Editors&rsquo; Note: We are huge proponents"));
assertTrue(article.getTopImage().getImageSrc().equals("http://thevacationgals.com/wp-content/uploads/2010/11/Gemmel-Family-Reunion-at-a-Vacation-Rental-Home1-300x225.jpg"));
}
+
+ // test the extraction of videos from a page
+ public void testGettingVideosFromGraphVinyl() {
+ String url = "http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere";
+ ContentExtractor contentExtractor = new ContentExtractor();
+ Article article = contentExtractor.extractContent(url);
+ assertTrue(article.getMovies().get(0).attr("src").equals("http://www.youtube.com/v/dsVWVtGWoa4&hl=en_US&fs=1&color1=d6d6d6&color2=ffffff&autoplay=1&iv_load_policy=3&rel=0&showinfo=0&hd=1"));
+
+ }
+
}
Please sign in to comment.
Something went wrong with that request. Please try again.