Skip to content

Commit

Permalink
adding unit tests for more sites and also fixing the domain that wasn…
Browse files Browse the repository at this point in the history
…'t being set correctly on the article object
  • Loading branch information
jiminoc committed Jan 25, 2011
1 parent fde6572 commit 3617b84
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 5 deletions.
18 changes: 14 additions & 4 deletions src/main/java/com/jimplush/goose/Article.java
Expand Up @@ -8,11 +8,12 @@
import org.apache.log4j.Logger;
import org.jsoup.nodes.Element;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;

/**
* This class represents the extraction of an Article from a website
*
*/
public class Article {

Expand Down Expand Up @@ -81,6 +82,7 @@ public class Article {

/**
* returns the title of the webpage
*
* @return
*/
public String getTitle() {
Expand All @@ -91,7 +93,7 @@ public void setTitle(String title) {
this.title = title;
}

public String getMetaDescription() {
public String getMetaDescription() {
return metaDescription;
}

Expand Down Expand Up @@ -119,7 +121,16 @@ public String getDomain() {
return domain;
}

public void setDomain(String domain) {
public void setDomain(String urlToParse) {
String domain = "";

URL url = null;
try {
url = new URL(urlToParse);
domain = url.getHost();
} catch (MalformedURLException e) {
logger.error(e.toString(), e);
}
this.domain = domain;
}

Expand All @@ -140,7 +151,6 @@ public void setMovies(ArrayList<Element> movies) {
}



public ArrayList<String> getImageCandidates() {
return imageCandidates;
}
Expand Down
12 changes: 11 additions & 1 deletion src/test/java/com/jimplush/goose/ArticleTest.java
Expand Up @@ -15,9 +15,19 @@ public void testArticle()
{
Article article = new Article();
article.setTitle("This is a title");
assertEquals("This is a title", article.getTitle());
}


assertEquals("This is a title", article.getTitle());
public void testSettingDomainOnArticle() {

Article article = new Article();
article.setDomain("http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere");
assertEquals("grapevinyl.com", article.getDomain());

article.setDomain("http://www.economist.com/v/84/magnetic-morning/getting-nowhere");
assertEquals("www.economist.com", article.getDomain());
}


}
10 changes: 10 additions & 0 deletions src/test/java/com/jimplush/goose/GoldSitesTest.java
Expand Up @@ -466,5 +466,15 @@ public void testTheVacationGals() {
assertTrue(article.getCleanedArticleText().startsWith("Editors&rsquo; Note: We are huge proponents"));
assertTrue(article.getTopImage().getImageSrc().equals("http://thevacationgals.com/wp-content/uploads/2010/11/Gemmel-Family-Reunion-at-a-Vacation-Rental-Home1-300x225.jpg"));
}

// test the extraction of videos from a page
public void testGettingVideosFromGraphVinyl() {
String url = "http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere";
ContentExtractor contentExtractor = new ContentExtractor();
Article article = contentExtractor.extractContent(url);
assertTrue(article.getMovies().get(0).attr("src").equals("http://www.youtube.com/v/dsVWVtGWoa4&hl=en_US&fs=1&color1=d6d6d6&color2=ffffff&autoplay=1&iv_load_policy=3&rel=0&showinfo=0&hd=1"));

}

}

0 comments on commit 3617b84

Please sign in to comment.