diff --git a/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java b/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java index 6e3e2ae79e..ac6ae7acac 100644 --- a/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java +++ b/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java @@ -47,7 +47,6 @@ import org.archive.wayback.RequestParser; import org.archive.wayback.ResourceStore; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.accesscontrol.AuthContextExclusionFilterFactory; import org.archive.wayback.accesscontrol.CollectionContext; import org.archive.wayback.accesscontrol.ContextExclusionFilterFactory; @@ -84,7 +83,6 @@ import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter; import org.archive.wayback.util.operator.BooleanOperator; -import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.util.webapp.AbstractRequestHandler; import org.archive.wayback.util.webapp.ShutdownListener; import org.archive.wayback.webapp.LiveWebRedirector.LiveWebState; @@ -105,7 +103,7 @@ * * @author brad */ -public class AccessPoint extends AbstractRequestHandler implements +public class AccessPoint extends AccessPointBase implements ShutdownListener, CollectionContext { /** webapp relative location of Interstitial.jsp */ public final static String INTERSTITIAL_JSP = "jsp/Interstitial.jsp"; @@ -131,7 +129,6 @@ public class AccessPoint extends AbstractRequestHandler implements AccessPoint.class.getName()); private boolean exactHostMatch = false; - private boolean exactSchemeMatch = false; private boolean useAnchorWindow = false; private boolean useServerName = false; private boolean serveStatic = true; @@ -195,8 +192,6 @@ public static enum PerfStat { private long embargoMS = 0; private CustomResultFilterFactory filterFactory = null; - private UrlCanonicalizer selfRedirectCanonicalizer = null; - private int maxRedirectAttempts = 0; private boolean fixedEmbeds = false; @@ -556,61 +551,6 @@ private void checkInterstitialRedirect(HttpServletRequest httpRequest, } } - protected boolean isSelfRedirect(Resource resource, - CaptureSearchResult closest, WaybackRequest wbRequest, - String canonRequestURL) { - int status = resource.getStatusCode(); - - // Only applies to redirects - if ((status < 300) || (status >= 400)) { - return false; - } - - String location = resource.getHeader("Location"); - - if (location == null) { - return false; - } - -// if (!closest.getCaptureTimestamp().equals(wbRequest.getReplayTimestamp())) { -// return false; -// } - - String redirScheme = UrlOperations.urlToScheme(location); - - try { - if (redirScheme == null && isExactSchemeMatch()) { - location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location); - redirScheme = UrlOperations.urlToScheme(location); - } else if (location.startsWith("/")) { - location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location); - } - - if (getSelfRedirectCanonicalizer() != null) { - location = getSelfRedirectCanonicalizer().urlStringToKey(location); - } - } catch (IOException e) { - return false; - } - - if (location.equals(canonRequestURL)) { - // if not exact scheme, don't do scheme compare, must be equal - if (!isExactSchemeMatch()) { - return true; - } - - String origScheme = UrlOperations.urlToScheme(wbRequest - .getRequestUrl()); - - if ((origScheme != null) && (redirScheme != null) && - (origScheme.compareTo(redirScheme) == 0)) { - return true; - } - } - - return false; - } - public SearchResults queryIndex(WaybackRequest wbRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, @@ -770,15 +710,7 @@ protected void handleReplay(WaybackRequest wbRequest, checkInterstitialRedirect(httpRequest,wbRequest); - String requestURL = wbRequest.getRequestUrl(); - - if (getSelfRedirectCanonicalizer() != null) { - try { - requestURL = getSelfRedirectCanonicalizer().urlStringToKey(requestURL); - } catch (IOException io) { - - } - } + String requestURLKey = urlToKey(wbRequest.getRequestUrl()); PerformanceLogger p = new PerformanceLogger("replay"); @@ -933,7 +865,7 @@ protected void handleReplay(WaybackRequest wbRequest, // If the status is a redirect, check that the location or url date's are different from the current request // Otherwise, replay the previous matched capture. // This chain is unlikely to go past one previous capture, but is possible - if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURL)) { + if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURLKey)) { LOGGER.info("Self-Redirect: Skipping " + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl()); //closest = findNextClosest(closest, captureResults, requestMS); closest = captureSelector.next(); @@ -1300,20 +1232,6 @@ public void setExactHostMatch(boolean exactHostMatch) { this.exactHostMatch = exactHostMatch; } - /** - * @return the exactSchemeMatch - */ - public boolean isExactSchemeMatch() { - return exactSchemeMatch; - } - - /** - * @param exactSchemeMatch the exactSchemeMatch to set - */ - public void setExactSchemeMatch(boolean exactSchemeMatch) { - this.exactSchemeMatch = exactSchemeMatch; - } - /** * @return true if this AccessPoint is configured to useAnchorWindow, that * is, to replay documents only if they are within a certain proximity to @@ -1842,23 +1760,6 @@ public CustomResultFilterFactory getFilterFactory() { return filterFactory; } - /** - * Optional - * @param selfRedirectCanonicalizer - */ - public void setSelfRedirectCanonicalizer( - UrlCanonicalizer selfRedirectCanonicalizer) { - this.selfRedirectCanonicalizer = selfRedirectCanonicalizer; - } - - /** - * URL canonicalizer for testing self-redirect. - * @return UrlCanonicalizer - */ - public UrlCanonicalizer getSelfRedirectCanonicalizer() { - return this.selfRedirectCanonicalizer; - } - public int getMaxRedirectAttempts() { return maxRedirectAttempts; } diff --git a/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPointBase.java b/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPointBase.java new file mode 100644 index 0000000000..fa8901d7b2 --- /dev/null +++ b/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPointBase.java @@ -0,0 +1,139 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.webapp; + +import java.io.IOException; + +import org.archive.wayback.UrlCanonicalizer; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.util.url.UrlOperations; +import org.archive.wayback.util.webapp.AbstractRequestHandler; + +/** + * AccessPointBase provides fields and methods common to AbstractRequestHandler + * implementations for core wayback machine functionalities (playback, search, + * live web archiving, etc.) + */ +public abstract class AccessPointBase extends AbstractRequestHandler { + + private boolean exactSchemeMatch = false; + private UrlCanonicalizer selfRedirectCanonicalizer = null; + + protected boolean isSelfRedirect(Resource resource, + CaptureSearchResult closest, WaybackRequest wbRequest, + String canonRequestURL) { + int status = resource.getStatusCode(); + + // Only applies to redirects + if ((status < 300) || (status >= 400)) { + return false; + } + + String location = resource.getHeader("Location"); + + if (location == null) { + return false; + } + + // if (!closest.getCaptureTimestamp().equals(wbRequest.getReplayTimestamp())) { + // return false; + // } + + String redirScheme = UrlOperations.urlToScheme(location); + + try { + if (redirScheme == null && isExactSchemeMatch()) { + location = UrlOperations.resolveUrl(closest.getOriginalUrl(), + location); + redirScheme = UrlOperations.urlToScheme(location); + } else if (location.startsWith("/")) { + location = UrlOperations.resolveUrl(closest.getOriginalUrl(), + location); + } + + if (getSelfRedirectCanonicalizer() != null) { + location = getSelfRedirectCanonicalizer().urlStringToKey( + location); + } + } catch (IOException e) { + return false; + } + + if (location.equals(canonRequestURL)) { + // if not exact scheme, don't do scheme compare, must be equal + if (!isExactSchemeMatch()) { + return true; + } + + String origScheme = UrlOperations.urlToScheme(wbRequest + .getRequestUrl()); + + if ((origScheme != null) && (redirScheme != null) && + (origScheme.compareTo(redirScheme) == 0)) { + return true; + } + } + + return false; + } + + /** + * @return the exactSchemeMatch + */ + public boolean isExactSchemeMatch() { + return exactSchemeMatch; + } + + /** + * @param exactSchemeMatch the exactSchemeMatch to set + */ + public void setExactSchemeMatch(boolean exactSchemeMatch) { + this.exactSchemeMatch = exactSchemeMatch; + } + + /** + * Optional + * @param selfRedirectCanonicalizer + */ + public void setSelfRedirectCanonicalizer(UrlCanonicalizer selfRedirectCanonicalizer) { + this.selfRedirectCanonicalizer = selfRedirectCanonicalizer; + } + + /** + * URL canonicalizer for testing self-redirect. + * @return UrlCanonicalizer + */ + public UrlCanonicalizer getSelfRedirectCanonicalizer() { + return selfRedirectCanonicalizer; + } + + protected String urlToKey(String url) { + if (selfRedirectCanonicalizer != null) { + try { + return selfRedirectCanonicalizer.urlStringToKey(url); + } catch (IOException ex) { + } + } + return url; + } +}