Permalink
Browse files

adding unit tests for more sites and also fixing the domain that wasn…

…'t being set correctly on the article object
  • Loading branch information...
1 parent fde6572 commit 3617b848978c73de04031a32afaddefb7a6980f1 @jiminoc jiminoc committed Jan 25, 2011
@@ -8,11 +8,12 @@
import org.apache.log4j.Logger;
import org.jsoup.nodes.Element;
+import java.net.MalformedURLException;
+import java.net.URL;
import java.util.ArrayList;
/**
* This class represents the extraction of an Article from a website
- *
*/
public class Article {
@@ -81,6 +82,7 @@
/**
* returns the title of the webpage
+ *
* @return
*/
public String getTitle() {
@@ -91,7 +93,7 @@ public void setTitle(String title) {
this.title = title;
}
- public String getMetaDescription() {
+ public String getMetaDescription() {
return metaDescription;
}
@@ -119,7 +121,16 @@ public String getDomain() {
return domain;
}
- public void setDomain(String domain) {
+ public void setDomain(String urlToParse) {
+ String domain = "";
+
+ URL url = null;
+ try {
+ url = new URL(urlToParse);
+ domain = url.getHost();
+ } catch (MalformedURLException e) {
+ logger.error(e.toString(), e);
+ }
this.domain = domain;
}
@@ -140,7 +151,6 @@ public void setMovies(ArrayList<Element> movies) {
}
-
public ArrayList<String> getImageCandidates() {
return imageCandidates;
}
@@ -15,9 +15,19 @@ public void testArticle()
{
Article article = new Article();
article.setTitle("This is a title");
+ assertEquals("This is a title", article.getTitle());
+ }
- assertEquals("This is a title", article.getTitle());
+ public void testSettingDomainOnArticle() {
+
+ Article article = new Article();
+ article.setDomain("http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere");
+ assertEquals("grapevinyl.com", article.getDomain());
+
+ article.setDomain("http://www.economist.com/v/84/magnetic-morning/getting-nowhere");
+ assertEquals("www.economist.com", article.getDomain());
}
+
}
@@ -466,5 +466,15 @@ public void testTheVacationGals() {
assertTrue(article.getCleanedArticleText().startsWith("Editors&rsquo; Note: We are huge proponents"));
assertTrue(article.getTopImage().getImageSrc().equals("http://thevacationgals.com/wp-content/uploads/2010/11/Gemmel-Family-Reunion-at-a-Vacation-Rental-Home1-300x225.jpg"));
}
+
+ // test the extraction of videos from a page
+ public void testGettingVideosFromGraphVinyl() {
+ String url = "http://grapevinyl.com/v/84/magnetic-morning/getting-nowhere";
+ ContentExtractor contentExtractor = new ContentExtractor();
+ Article article = contentExtractor.extractContent(url);
+ assertTrue(article.getMovies().get(0).attr("src").equals("http://www.youtube.com/v/dsVWVtGWoa4&hl=en_US&fs=1&color1=d6d6d6&color2=ffffff&autoplay=1&iv_load_policy=3&rel=0&showinfo=0&hd=1"));
+
+ }
+
}

0 comments on commit 3617b84

Please sign in to comment.