diff --git a/pom.xml b/pom.xml index aeac7e1..560e755 100644 --- a/pom.xml +++ b/pom.xml @@ -1,12 +1,12 @@ - + 4.0.0 - com.github.dfabulich + com.github.TomWalbers sitemapgen4j jar 1.0.7-SNAPSHOT SitemapGen4J - https://github.com/dfabulich/sitemapgen4j/ + https://github.com/TomWalbers/sitemapgen4j/ SitemapGen4j is an XML sitemap generator written in Java. @@ -16,9 +16,9 @@ - scm:git:git://github.com:dfabulich/sitemapgen4j.git - scm:git:git@github.com:dfabulich/sitemapgen4j.git - https://github.com/dfabulich/sitemapgen4j/ + scm:git:git://github.com:TomWalbers/sitemapgen4j.git + scm:git:git@github.com:TomWalbers/sitemapgen4j.git + https://github.com/TomWalbers/sitemapgen4j/ UTF-8 diff --git a/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java b/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java index a8ec3b2..0d59a35 100644 --- a/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java +++ b/src/main/java/com/redfin/sitemapgenerator/AbstractSitemapUrlRenderer.java @@ -43,4 +43,9 @@ public void renderTag(StringBuilder sb, String namespace, String tagName, Object sb.append(">\n"); } + public void renderSubTag(StringBuilder sb, String namespace, String tagName, Object value) { + sb.append(" "); + renderTag(sb, namespace, tagName, value); + } + } diff --git a/src/main/java/com/redfin/sitemapgenerator/GoogleNewsPublication.java b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsPublication.java new file mode 100644 index 0000000..7dbf2b4 --- /dev/null +++ b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsPublication.java @@ -0,0 +1,31 @@ +package com.redfin.sitemapgenerator; + +/** + * @author Tom + * @since 23/05/2017 + */ +public class GoogleNewsPublication { + private String name; + private String language; + + public GoogleNewsPublication(String name, String language) { + this.name = name; + this.language = language; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } +} diff --git a/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapGenerator.java b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapGenerator.java index 6e88f3b..9947f41 100644 --- a/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapGenerator.java +++ b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapGenerator.java @@ -6,106 +6,121 @@ /** * Builds a sitemap for Google News. To configure options, use {@link #builder(URL, File)} + * * @author Dan Fabulich * @see Creating a News Sitemap */ -public class GoogleNewsSitemapGenerator extends SitemapGenerator { - - /** 1000 URLs max in a Google News sitemap. */ - public static final int MAX_URLS_PER_SITEMAP = 1000; - - /** Configures a builder so you can specify sitemap generator options - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. - * @return a builder; call .build() on it to make a sitemap generator - */ - public static SitemapGeneratorBuilder builder(URL baseUrl, File baseDir) { - SitemapGeneratorBuilder builder = - new SitemapGeneratorBuilder(baseUrl, baseDir, GoogleNewsSitemapGenerator.class); - builder.maxUrls = 1000; - return builder; - } - - /** Configures a builder so you can specify sitemap generator options - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. - * @return a builder; call .build() on it to make a sitemap generator - */ - public static SitemapGeneratorBuilder builder(String baseUrl, File baseDir) throws MalformedURLException { - SitemapGeneratorBuilder builder = - new SitemapGeneratorBuilder(baseUrl, baseDir, GoogleNewsSitemapGenerator.class); - builder.maxUrls = GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP; - return builder; - } - - GoogleNewsSitemapGenerator(AbstractSitemapGeneratorOptions options) { - super(options, new Renderer()); - if (options.maxUrls > GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP) { - throw new RuntimeException("Google News sitemaps can have only 1000 URLs per sitemap: " + options.maxUrls); - } - } - - /** Configures the generator with a base URL and directory to write the sitemap files. - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. - * @throws MalformedURLException - */ - public GoogleNewsSitemapGenerator(String baseUrl, File baseDir) - throws MalformedURLException { - this(new SitemapGeneratorOptions(baseUrl, baseDir)); - } - - /** Configures the generator with a base URL and directory to write the sitemap files. - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. - */ - public GoogleNewsSitemapGenerator(URL baseUrl, File baseDir) { - this(new SitemapGeneratorOptions(baseUrl, baseDir)); - } - - /**Configures the generator with a base URL and a null directory. The object constructed - * is not intended to be used to write to files. Rather, it is intended to be used to obtain - * XML-formatted strings that represent sitemaps. - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - */ - public GoogleNewsSitemapGenerator(String baseUrl) throws MalformedURLException { - this(new SitemapGeneratorOptions(new URL(baseUrl))); - } - - /**Configures the generator with a base URL and a null directory. The object constructed - * is not intended to be used to write to files. Rather, it is intended to be used to obtain - * XML-formatted strings that represent sitemaps. - * - * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL - */ - public GoogleNewsSitemapGenerator(URL baseUrl) { - this(new SitemapGeneratorOptions(baseUrl)); - } - - private static class Renderer extends AbstractSitemapUrlRenderer implements ISitemapUrlRenderer { - - public Class getUrlClass() { - return GoogleNewsSitemapUrl.class; - } - - public String getXmlNamespaces() { - return "xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\""; - } - - public void render(GoogleNewsSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) { - StringBuilder tagSb = new StringBuilder(); - tagSb.append(" \n"); - renderTag(tagSb, "news", "publication_date", dateFormat.format(url.getPublicationDate())); - renderTag(tagSb, "news", "keywords", url.getKeywords()); - tagSb.append(" \n"); - super.render(url, sb, dateFormat, tagSb.toString()); - } - - } +public class GoogleNewsSitemapGenerator extends SitemapGenerator { + + /** + * 1000 URLs max in a Google News sitemap. + */ + public static final int MAX_URLS_PER_SITEMAP = 1000; + + GoogleNewsSitemapGenerator(AbstractSitemapGeneratorOptions options) { + super(options, new Renderer()); + if (options.maxUrls > GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP) { + throw new RuntimeException("Google News sitemaps can have only 1000 URLs per sitemap: " + options.maxUrls); + } + } + + /** + * Configures the generator with a base URL and directory to write the sitemap files. + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. + * @throws MalformedURLException + */ + public GoogleNewsSitemapGenerator(String baseUrl, File baseDir) + throws MalformedURLException { + this(new SitemapGeneratorOptions(baseUrl, baseDir)); + } + + /** + * Configures the generator with a base URL and directory to write the sitemap files. + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. + */ + public GoogleNewsSitemapGenerator(URL baseUrl, File baseDir) { + this(new SitemapGeneratorOptions(baseUrl, baseDir)); + } + + /** + * Configures the generator with a base URL and a null directory. The object constructed + * is not intended to be used to write to files. Rather, it is intended to be used to obtain + * XML-formatted strings that represent sitemaps. + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + */ + public GoogleNewsSitemapGenerator(String baseUrl) throws MalformedURLException { + this(new SitemapGeneratorOptions(new URL(baseUrl))); + } + + /** + * Configures the generator with a base URL and a null directory. The object constructed + * is not intended to be used to write to files. Rather, it is intended to be used to obtain + * XML-formatted strings that represent sitemaps. + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + */ + public GoogleNewsSitemapGenerator(URL baseUrl) { + this(new SitemapGeneratorOptions(baseUrl)); + } + + /** + * Configures a builder so you can specify sitemap generator options + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. + * @return a builder; call .build() on it to make a sitemap generator + */ + public static SitemapGeneratorBuilder builder(URL baseUrl, File baseDir) { + SitemapGeneratorBuilder builder = + new SitemapGeneratorBuilder(baseUrl, baseDir, GoogleNewsSitemapGenerator.class); + builder.maxUrls = 1000; + return builder; + } + + /** + * Configures a builder so you can specify sitemap generator options + * + * @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL + * @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on. + * @return a builder; call .build() on it to make a sitemap generator + */ + public static SitemapGeneratorBuilder builder(String baseUrl, File baseDir) throws MalformedURLException { + SitemapGeneratorBuilder builder = + new SitemapGeneratorBuilder(baseUrl, baseDir, GoogleNewsSitemapGenerator.class); + builder.maxUrls = GoogleNewsSitemapGenerator.MAX_URLS_PER_SITEMAP; + return builder; + } + + private static class Renderer extends AbstractSitemapUrlRenderer implements ISitemapUrlRenderer { + + public Class getUrlClass() { + return GoogleNewsSitemapUrl.class; + } + + public String getXmlNamespaces() { + return "xmlns:news=\"http://www.google.com/schemas/sitemap-news/0.9\""; + } + + public void render(GoogleNewsSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) { + StringBuilder tagSb = new StringBuilder(); + tagSb.append(" \n"); + tagSb.append(" \n"); + renderSubTag(tagSb, "news", "name", url.getPublication().getName()); + renderSubTag(tagSb, "news", "language", url.getPublication().getLanguage()); + tagSb.append(" \n"); + renderTag(tagSb, "news", "genres", url.getGenres()); + renderTag(tagSb, "news", "publication_date", dateFormat.format(url.getPublicationDate())); + renderTag(tagSb, "news", "title", url.getTitle()); + renderTag(tagSb, "news", "keywords", url.getKeywords()); + tagSb.append(" \n"); + super.render(url, sb, dateFormat, tagSb.toString()); + } + + } } diff --git a/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrl.java b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrl.java index f3f733f..ef9aa42 100644 --- a/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrl.java +++ b/src/main/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrl.java @@ -7,89 +7,179 @@ /** * One configurable Google News Search URL. To configure, use {@link Options} + * * @author Dan Fabulich * @see Options * @see Creating a News Sitemap */ public class GoogleNewsSitemapUrl extends WebSitemapUrl { - private final Date publicationDate; - private final String keywords; - - /** Options to configure Google News URLs */ - public static class Options extends AbstractSitemapUrlOptions { - private Date publicationDate; - private String keywords; - - /** Specifies an URL and publication date (which is mandatory for Google News) */ - public Options(String url, Date publicationDate) throws MalformedURLException { - this(new URL(url), publicationDate); - } - - /** Specifies an URL and publication date (which is mandatory for Google News) */ - public Options(URL url, Date publicationDate) { - super(url, GoogleNewsSitemapUrl.class); - if (publicationDate == null) throw new NullPointerException("publicationDate must not be null"); - this.publicationDate = publicationDate; - } - - /** Specifies a list of comma-delimited keywords */ - public Options keywords(String keywords) { - this.keywords = keywords; - return this; - } - - /** Specifies a list of comma-delimited keywords */ - public Options keywords(Iterable keywords) { - StringBuilder sb = new StringBuilder(); - boolean first = true; - for (String keyword : keywords) { - if (first) { - first = false; - } else { - sb.append(", "); - } - sb.append(keyword); - } - this.keywords = sb.toString(); - return this; - } - - /** Specifies a list of comma-delimited keywords */ - public Options keywords(String... keywords) { - return keywords(Arrays.asList(keywords)); - } - - } - - /** Specifies an URL and publication date (which is mandatory for Google News) */ - public GoogleNewsSitemapUrl(URL url, Date publicationDate) { - this(new Options(url, publicationDate)); - } - - /** Specifies an URL and publication date (which is mandatory for Google News) */ - public GoogleNewsSitemapUrl(String url, Date publicationDate) throws MalformedURLException { - this(new Options(url, publicationDate)); - } - - /** Configures an URL with options */ - public GoogleNewsSitemapUrl(Options options) { - super(options); - publicationDate = options.publicationDate; - keywords = options.keywords; - } - - /** Retrieves the publication date */ - public Date getPublicationDate() { - return publicationDate; - } - - /** Retrieves the list of comma-delimited keywords */ - public String getKeywords() { - return keywords; - } + private final Date publicationDate; + private final String keywords; + private final String genres; + private final String title; + private final GoogleNewsPublication publication; + /** + * Specifies an URL and publication date, title and publication (which are mandatory for Google News) + */ + public GoogleNewsSitemapUrl(URL url, Date publicationDate, String title, String name, String language) { + this(new Options(url, publicationDate, title, name, language)); + } + /** + * Specifies an URL and publication date, title and publication (which are mandatory for Google News) + */ + public GoogleNewsSitemapUrl(URL url, Date publicationDate, String title, GoogleNewsPublication publication) { + this(new Options(url, publicationDate, title, publication)); + } + /** + * Specifies an URL and publication date, title and publication (which are mandatory for Google News) + */ + public GoogleNewsSitemapUrl(String url, Date publicationDate, String title, String name, String language) throws MalformedURLException { + this(new Options(url, publicationDate, title, name, language)); + } + /** + * Specifies an URL and publication date, title and publication (which are mandatory for Google News) + */ + public GoogleNewsSitemapUrl(String url, Date publicationDate, String title, GoogleNewsPublication publication) throws MalformedURLException { + this(new Options(url, publicationDate, title, publication)); + } + + /** + * Configures an URL with options + */ + public GoogleNewsSitemapUrl(Options options) { + super(options); + publicationDate = options.publicationDate; + keywords = options.keywords; + genres = options.genres; + title = options.title; + publication = options.publication; + } + + /** + * Retrieves the publication date + */ + public Date getPublicationDate() { + return publicationDate; + } + + /** + * Retrieves the list of comma-delimited keywords + */ + public String getKeywords() { + return keywords; + } + + /** + * Retrieves the Genres + */ + public String getGenres() { + return genres; + } + + /** + * Retrieves the title + */ + public String getTitle() { + return title; + } + + /** + * Retrieves the publication with name and language + */ + public GoogleNewsPublication getPublication() { + return publication; + } + + /** + * Options to configure Google News URLs + */ + public static class Options extends AbstractSitemapUrlOptions { + private Date publicationDate; + private String keywords; + private String genres; + private String title; + private GoogleNewsPublication publication; + + /** + * Specifies an URL and publication date (which is mandatory for Google News) + */ + public Options(String url, Date publicationDate, String title, GoogleNewsPublication publication) throws MalformedURLException { + this(new URL(url), publicationDate, title, publication); + } + + public Options(String url, Date publicationDate, String title, String name, String language) throws MalformedURLException { + this(new URL(url), publicationDate, title, new GoogleNewsPublication(name, language)); + } + + public Options(URL url, Date publicationDate, String title, String name, String language) { + this(url, publicationDate, title, new GoogleNewsPublication(name, language)); + } + + public Options(URL url, Date publicationDate, String title, GoogleNewsPublication publication) { + super(url, GoogleNewsSitemapUrl.class); + if (publicationDate == null) throw new NullPointerException("publicationDate must not be null"); + this.publicationDate = publicationDate; + if (title == null) throw new NullPointerException("title must not be null"); + this.title = title; + if (publication == null) throw new NullPointerException("publication must not be null"); + this.publication = publication; + } + + /** + * Specifies a list of comma-delimited keywords + */ + public Options keywords(String keywords) { + this.keywords = keywords; + return this; + } + + /** + * Specifies a list of comma-delimited keywords + */ + public Options keywords(Iterable keywords) { + this.keywords = getListAsCommaSeparatedString(keywords); + return this; + } + + /** + * Specifies a list of comma-delimited keywords + */ + public Options keywords(String... keywords) { + return keywords(Arrays.asList(keywords)); + } + + public Options genres(String genres) { + this.genres = genres; + return this; + } + + public Options genres(Iterable genres) { + this.genres = getListAsCommaSeparatedString(genres); + return this; + } + + private String getListAsCommaSeparatedString(Iterable genres) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (String genre : genres) { + if (first) { + first = false; + } else { + sb.append(", "); + } + sb.append(genre); + } + return sb.toString(); + } + + public Options genres(String... genres) { + return genres(Arrays.asList(genres)); + } + + } } diff --git a/src/test/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrlTest.java b/src/test/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrlTest.java index e147f69..80806fc 100644 --- a/src/test/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrlTest.java +++ b/src/test/java/com/redfin/sitemapgenerator/GoogleNewsSitemapUrlTest.java @@ -1,16 +1,12 @@ package com.redfin.sitemapgenerator; +import com.redfin.sitemapgenerator.W3CDateFormat.Pattern; +import junit.framework.TestCase; + import java.io.File; import java.util.Date; import java.util.List; -import junit.framework.TestCase; - -import com.redfin.sitemapgenerator.GoogleNewsSitemapGenerator; -import com.redfin.sitemapgenerator.GoogleNewsSitemapUrl; -import com.redfin.sitemapgenerator.W3CDateFormat; -import com.redfin.sitemapgenerator.W3CDateFormat.Pattern; - public class GoogleNewsSitemapUrlTest extends TestCase { File dir; @@ -38,14 +34,19 @@ public void testSimpleUrl() throws Exception { dateFormat.setTimeZone(W3CDateFormat.ZULU); wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir) .dateFormat(dateFormat).build(); - GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl("http://www.example.com/index.html", new Date(0)); + GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en"); wsg.addUrl(url); String expected = "\n" + "\n" + " \n" + - " http://www.example.com/index.html\n" + - " \n" + - " 1970-01-01T00:00:00Z\n" + + " http://www.example.com/index.html\n" + + " \n" + + " \n" + + " The Example Times\n" + + " en\n" + + " \n" + + " 1970-01-01T00:00:00Z\n" + + " Example Title\n" + " \n" + " \n" + ""; @@ -58,16 +59,21 @@ public void testKeywords() throws Exception { dateFormat.setTimeZone(W3CDateFormat.ZULU); wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir) .dateFormat(dateFormat).build(); - GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0)) + GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en") .keywords("Klaatu", "Barrata", "Nicto") .build(); wsg.addUrl(url); String expected = "\n" + "\n" + " \n" + - " http://www.example.com/index.html\n" + - " \n" + - " 1970-01-01T00:00:00Z\n" + + " http://www.example.com/index.html\n" + + " \n" + + " \n" + + " The Example Times\n" + + " en\n" + + " \n" + + " 1970-01-01T00:00:00Z\n" + + " Example Title\n" + " Klaatu, Barrata, Nicto\n" + " \n" + " \n" + @@ -75,6 +81,34 @@ public void testKeywords() throws Exception { String sitemap = writeSingleSiteMap(wsg); assertEquals(expected, sitemap); } + + public void testGenres() throws Exception { + W3CDateFormat dateFormat = new W3CDateFormat(Pattern.SECOND); + dateFormat.setTimeZone(W3CDateFormat.ZULU); + wsg = GoogleNewsSitemapGenerator.builder("http://www.example.com", dir) + .dateFormat(dateFormat).build(); + GoogleNewsSitemapUrl url = new GoogleNewsSitemapUrl.Options("http://www.example.com/index.html", new Date(0), "Example Title", "The Example Times", "en") + .genres("persbericht") + .build(); + wsg.addUrl(url); + String expected = "\n" + + "\n" + + " \n" + + " http://www.example.com/index.html\n" + + " \n" + + " \n" + + " The Example Times\n" + + " en\n" + + " \n" + + " persbericht\n" + + " 1970-01-01T00:00:00Z\n" + + " Example Title\n" + + " \n" + + " \n" + + ""; + String sitemap = writeSingleSiteMap(wsg); + assertEquals(expected, sitemap); + } private String writeSingleSiteMap(GoogleNewsSitemapGenerator wsg) { List files = wsg.write();