From 18bbae908c6f670b4d33bc86896fe98a257e3540 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Thu, 29 Sep 2016 21:39:24 -0700 Subject: [PATCH] Upgrade to JDK 1.8 --- .travis.yml | 4 +- .../domains/EffectiveTldFinder.java | 11 ++++-- .../fetcher/http/UserAgent.java | 39 ++++++++++++------- .../crawlercommons/filters/URLFilter.java | 4 +- .../robots/BaseRobotsParser.java | 10 ++--- .../crawlercommons/robots/RobotUtils.java | 3 +- .../robots/SimpleRobotRules.java | 8 ++-- .../crawlercommons/sitemaps/SiteMapURL.java | 31 ++++++++++----- .../sitemaps/UnknownFormatException.java | 5 ++- 9 files changed, 75 insertions(+), 40 deletions(-) diff --git a/.travis.yml b/.travis.yml index a6fd148e..7e349507 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ language: java +jdk: + - oraclejdk8 + script: - - jdk_switcher use oraclejdk8 - mvn install javadoc:aggregate notifications: diff --git a/src/main/java/crawlercommons/domains/EffectiveTldFinder.java b/src/main/java/crawlercommons/domains/EffectiveTldFinder.java index 6e18f576..155f6f38 100644 --- a/src/main/java/crawlercommons/domains/EffectiveTldFinder.java +++ b/src/main/java/crawlercommons/domains/EffectiveTldFinder.java @@ -108,9 +108,11 @@ public static Map getEffectiveTLDs() { } /** - * @param hostname the hostname for which to find the - * {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} - * @return the {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} + * @param hostname + * the hostname for which to find the + * {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} + * @return the + * {@link crawlercommons.domains.EffectiveTldFinder.EffectiveTLD} */ public static EffectiveTLD getEffectiveTLD(String hostname) { if (getInstance().domains.containsKey(hostname)) { @@ -145,7 +147,8 @@ public static EffectiveTLD getEffectiveTLD(String hostname) { * This method uses the effective TLD to determine which component of a FQDN * is the NIC-assigned domain name. * - * @param hostname a string for which to obtain a NIC-assigned domain name + * @param hostname + * a string for which to obtain a NIC-assigned domain name * @return the NIC-assigned domain name */ public static String getAssignedDomain(String hostname) { diff --git a/src/main/java/crawlercommons/fetcher/http/UserAgent.java b/src/main/java/crawlercommons/fetcher/http/UserAgent.java index f60de3d2..0f39e7c6 100644 --- a/src/main/java/crawlercommons/fetcher/http/UserAgent.java +++ b/src/main/java/crawlercommons/fetcher/http/UserAgent.java @@ -54,9 +54,12 @@ public class UserAgent implements Serializable { /** * Set user agent characteristics * - * @param agentName an agent name string to associate with the crawler - * @param emailAddress an agent email address string to associate with the crawler - * @param webAddress a Web address string to associate with the crawler + * @param agentName + * an agent name string to associate with the crawler + * @param emailAddress + * an agent email address string to associate with the crawler + * @param webAddress + * a Web address string to associate with the crawler */ public UserAgent(String agentName, String emailAddress, String webAddress) { this(agentName, emailAddress, webAddress, DEFAULT_BROWSER_VERSION); @@ -65,10 +68,14 @@ public UserAgent(String agentName, String emailAddress, String webAddress) { /** * Set user agent characteristics * - * @param agentName an agent name string to associate with the crawler - * @param emailAddress an agent email address string to associate with the crawler - * @param webAddress a Web address string to associate with the crawler - * @param browserVersion a browser version to mimic + * @param agentName + * an agent name string to associate with the crawler + * @param emailAddress + * an agent email address string to associate with the crawler + * @param webAddress + * a Web address string to associate with the crawler + * @param browserVersion + * a browser version to mimic */ public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion) { this(agentName, emailAddress, webAddress, browserVersion, DEFAULT_CRAWLER_VERSION); @@ -77,11 +84,16 @@ public UserAgent(String agentName, String emailAddress, String webAddress, Strin /** * Set user agent characteristics * - * @param agentName an agent name string to associate with the crawler - * @param emailAddress an agent email address string to associate with the crawler - * @param webAddress a Web address string to associate with the crawler - * @param browserVersion a browser version to mimic - * @param crawlerVersion the version of your crawler/crawl agent + * @param agentName + * an agent name string to associate with the crawler + * @param emailAddress + * an agent email address string to associate with the crawler + * @param webAddress + * a Web address string to associate with the crawler + * @param browserVersion + * a browser version to mimic + * @param crawlerVersion + * the version of your crawler/crawl agent */ public UserAgent(String agentName, String emailAddress, String webAddress, String browserVersion, String crawlerVersion) { this.agentName = agentName; @@ -106,7 +118,8 @@ public String getAgentName() { * @return User Agent String */ public String getUserAgentString() { - // Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com; mycrawler@mydomain.com) + // Mozilla/5.0 (compatible; mycrawler/1.0; +http://www.mydomain.com; + // mycrawler@mydomain.com) return String.format(Locale.getDefault(), "%s (compatible; %s%s; +%s; %s)", browserVersion, getAgentName(), crawlerConfiguration, webAddress, emailAddress); } } diff --git a/src/main/java/crawlercommons/filters/URLFilter.java b/src/main/java/crawlercommons/filters/URLFilter.java index 8fc2e9cb..e9206cda 100644 --- a/src/main/java/crawlercommons/filters/URLFilter.java +++ b/src/main/java/crawlercommons/filters/URLFilter.java @@ -21,7 +21,9 @@ public abstract class URLFilter { /** * Returns a modified version of the input URL or null if the URL should be * removed - * @param urlString a URL string to check against filter(s) + * + * @param urlString + * a URL string to check against filter(s) * @return a filtered URL **/ public abstract String filter(String urlString); diff --git a/src/main/java/crawlercommons/robots/BaseRobotsParser.java b/src/main/java/crawlercommons/robots/BaseRobotsParser.java index cd2e667b..d5f63776 100644 --- a/src/main/java/crawlercommons/robots/BaseRobotsParser.java +++ b/src/main/java/crawlercommons/robots/BaseRobotsParser.java @@ -22,11 +22,11 @@ public abstract class BaseRobotsParser implements Serializable { /** - * Parse the robots.txt file in content, and return rules appropriate for - * processing paths by userAgent. Note that multiple agent names may be - * provided as comma-separated values; the order of these shouldn't matter, - * as the file is parsed in order, and each agent name found in the file - * will be compared to every agent name found in robotNames. + * Parse the robots.txt file in content, and return rules appropriate + * for processing paths by userAgent. Note that multiple agent names + * may be provided as comma-separated values; the order of these shouldn't + * matter, as the file is parsed in order, and each agent name found in the + * file will be compared to every agent name found in robotNames. * * Also note that names are lower-cased before comparison, and that any * robot name you pass shouldn't contain commas or spaces; if the name has diff --git a/src/main/java/crawlercommons/robots/RobotUtils.java b/src/main/java/crawlercommons/robots/RobotUtils.java index 292e9e0a..ed8a2118 100644 --- a/src/main/java/crawlercommons/robots/RobotUtils.java +++ b/src/main/java/crawlercommons/robots/RobotUtils.java @@ -86,7 +86,8 @@ public static long getMaxFetchTime() { * @param fetcher * Fetcher for downloading robots.txt file * @param parser - * a {@link crawlercommons.robots.BaseRobotsParser} to use for obtaining appropriate rules + * a {@link crawlercommons.robots.BaseRobotsParser} to use for + * obtaining appropriate rules * @param robotsUrl * URL to robots.txt file * @return Robot rules diff --git a/src/main/java/crawlercommons/robots/SimpleRobotRules.java b/src/main/java/crawlercommons/robots/SimpleRobotRules.java index 2cb4bba0..efe761ce 100644 --- a/src/main/java/crawlercommons/robots/SimpleRobotRules.java +++ b/src/main/java/crawlercommons/robots/SimpleRobotRules.java @@ -65,7 +65,7 @@ public int compareTo(RobotRule o) { /* * (non-Javadoc) - * + * * @see java.lang.Object#hashCode() */ @Override @@ -79,7 +79,7 @@ public int hashCode() { /* * (non-Javadoc) - * + * * @see java.lang.Object#equals(java.lang.Object) */ @Override @@ -275,7 +275,7 @@ public void sortRules() { /** * Is our ruleset set up to allow all access? - * + * * @return true if all URLs are allowed. */ @Override @@ -285,7 +285,7 @@ public boolean isAllowAll() { /** * Is our ruleset set up to disallow all access? - * + * * @return true if no URLs are allowed. */ @Override diff --git a/src/main/java/crawlercommons/sitemaps/SiteMapURL.java b/src/main/java/crawlercommons/sitemaps/SiteMapURL.java index edcec8a1..fbfd7585 100644 --- a/src/main/java/crawlercommons/sitemaps/SiteMapURL.java +++ b/src/main/java/crawlercommons/sitemaps/SiteMapURL.java @@ -104,7 +104,8 @@ public URL getUrl() { /** * Set the URL. * - * @param url of the sitemap + * @param url + * of the sitemap */ public void setUrl(URL url) { this.url = url; @@ -138,7 +139,8 @@ public Date getLastModified() { /** * Set when this URL was last modified. * - * @param lastModified the last time the sitemap was modified + * @param lastModified + * the last time the sitemap was modified */ public void setLastModified(String lastModified) { this.lastModified = SiteMap.convertToDate(lastModified); @@ -147,7 +149,8 @@ public void setLastModified(String lastModified) { /** * Set when this URL was last modified. * - * @param lastModified the last time the sitemap was modified + * @param lastModified + * the last time the sitemap was modified */ public void setLastModified(Date lastModified) { this.lastModified = lastModified; @@ -166,7 +169,8 @@ public double getPriority() { * Set the URL's priority to a value between [0.0 - 1.0] (Default Priority * is used if the given priority is out of range). * - * @param priority a value between [0.0 - 1.0] + * @param priority + * a value between [0.0 - 1.0] */ public void setPriority(double priority) { @@ -183,7 +187,8 @@ public void setPriority(double priority) { * Set the URL's priority to a value between [0.0 - 1.0] (Default Priority * is used if the given priority missing or is out of range). * - * @param priorityStr a value between [0.0 - 1.0] + * @param priorityStr + * a value between [0.0 - 1.0] */ public void setPriority(String priorityStr) { try { @@ -211,8 +216,9 @@ public ChangeFrequency getChangeFrequency() { /** * Set the URL's change frequency * - * @param changeFreq a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} - * for this sitemap + * @param changeFreq + * a {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} + * for this sitemap */ public void setChangeFrequency(ChangeFrequency changeFreq) { this.changeFreq = changeFreq; @@ -222,8 +228,10 @@ public void setChangeFrequency(ChangeFrequency changeFreq) { * Set the URL's change frequency In case of a bad ChangeFrequency, the * current frequency in this instance will be set to NULL * - * @param changeFreq a string representing a - * {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for this sitemap + * @param changeFreq + * a string representing a + * {@link crawlercommons.sitemaps.SiteMapURL.ChangeFrequency} for + * this sitemap */ public void setChangeFrequency(String changeFreq) { @@ -253,7 +261,9 @@ public void setChangeFrequency(String changeFreq) { /** * Valid means that it follows the official guidelines that the siteMapURL * must be under the base url - * @param valid whether the Sitemap is valid syntax or not + * + * @param valid + * whether the Sitemap is valid syntax or not */ public void setValid(boolean valid) { this.valid = valid; @@ -261,6 +271,7 @@ public void setValid(boolean valid) { /** * Is the siteMapURL under the base url ? + * * @return true if the syntax is valid, false otherwise */ public boolean isValid() { diff --git a/src/main/java/crawlercommons/sitemaps/UnknownFormatException.java b/src/main/java/crawlercommons/sitemaps/UnknownFormatException.java index d491614a..ecb4a676 100644 --- a/src/main/java/crawlercommons/sitemaps/UnknownFormatException.java +++ b/src/main/java/crawlercommons/sitemaps/UnknownFormatException.java @@ -30,7 +30,9 @@ public UnknownFormatException() { /** * Constructor receives some kind of message that is saved in an instance * variable. - * @param err a String object to use within the Execption + * + * @param err + * a String object to use within the Execption */ public UnknownFormatException(String err) { super(err); @@ -40,6 +42,7 @@ public UnknownFormatException(String err) { /** * public method, callable by exception catcher. It returns the error * message. + * * @return a populated Exception as a String */ public String getError() {