Skip to content

Commit

Permalink
Pull up fields and methods related to urlkey calculation from AccessP…
Browse files Browse the repository at this point in the history
…oint to new AccessPointBase class, so that it can be reused in other RequestHandler implementations.
  • Loading branch information
kngenie committed Jan 22, 2017
1 parent cd50eb9 commit a430a24
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 102 deletions.
105 changes: 3 additions & 102 deletions wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java
Expand Up @@ -47,7 +47,6 @@
import org.archive.wayback.RequestParser;
import org.archive.wayback.ResourceStore;
import org.archive.wayback.ResultURIConverter;
import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.accesscontrol.AuthContextExclusionFilterFactory;
import org.archive.wayback.accesscontrol.CollectionContext;
import org.archive.wayback.accesscontrol.ContextExclusionFilterFactory;
Expand Down Expand Up @@ -84,7 +83,6 @@
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter;
import org.archive.wayback.util.operator.BooleanOperator;
import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.util.webapp.AbstractRequestHandler;
import org.archive.wayback.util.webapp.ShutdownListener;
import org.archive.wayback.webapp.LiveWebRedirector.LiveWebState;
Expand All @@ -105,7 +103,7 @@
*
* @author brad
*/
public class AccessPoint extends AbstractRequestHandler implements
public class AccessPoint extends AccessPointBase implements
ShutdownListener, CollectionContext {
/** webapp relative location of Interstitial.jsp */
public final static String INTERSTITIAL_JSP = "jsp/Interstitial.jsp";
Expand All @@ -131,7 +129,6 @@ public class AccessPoint extends AbstractRequestHandler implements
AccessPoint.class.getName());

private boolean exactHostMatch = false;
private boolean exactSchemeMatch = false;
private boolean useAnchorWindow = false;
private boolean useServerName = false;
private boolean serveStatic = true;
Expand Down Expand Up @@ -195,8 +192,6 @@ public static enum PerfStat {
private long embargoMS = 0;
private CustomResultFilterFactory filterFactory = null;

private UrlCanonicalizer selfRedirectCanonicalizer = null;

private int maxRedirectAttempts = 0;

private boolean fixedEmbeds = false;
Expand Down Expand Up @@ -556,61 +551,6 @@ private void checkInterstitialRedirect(HttpServletRequest httpRequest,
}
}

protected boolean isSelfRedirect(Resource resource,
CaptureSearchResult closest, WaybackRequest wbRequest,
String canonRequestURL) {
int status = resource.getStatusCode();

// Only applies to redirects
if ((status < 300) || (status >= 400)) {
return false;
}

String location = resource.getHeader("Location");

if (location == null) {
return false;
}

// if (!closest.getCaptureTimestamp().equals(wbRequest.getReplayTimestamp())) {
// return false;
// }

String redirScheme = UrlOperations.urlToScheme(location);

try {
if (redirScheme == null && isExactSchemeMatch()) {
location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location);
redirScheme = UrlOperations.urlToScheme(location);
} else if (location.startsWith("/")) {
location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location);
}

if (getSelfRedirectCanonicalizer() != null) {
location = getSelfRedirectCanonicalizer().urlStringToKey(location);
}
} catch (IOException e) {
return false;
}

if (location.equals(canonRequestURL)) {
// if not exact scheme, don't do scheme compare, must be equal
if (!isExactSchemeMatch()) {
return true;
}

String origScheme = UrlOperations.urlToScheme(wbRequest
.getRequestUrl());

if ((origScheme != null) && (redirScheme != null) &&
(origScheme.compareTo(redirScheme) == 0)) {
return true;
}
}

return false;
}

public SearchResults queryIndex(WaybackRequest wbRequest)
throws ResourceIndexNotAvailableException,
ResourceNotInArchiveException, BadQueryException,
Expand Down Expand Up @@ -770,15 +710,7 @@ protected void handleReplay(WaybackRequest wbRequest,

checkInterstitialRedirect(httpRequest,wbRequest);

String requestURL = wbRequest.getRequestUrl();

if (getSelfRedirectCanonicalizer() != null) {
try {
requestURL = getSelfRedirectCanonicalizer().urlStringToKey(requestURL);
} catch (IOException io) {

}
}
String requestURLKey = urlToKey(wbRequest.getRequestUrl());

PerformanceLogger p = new PerformanceLogger("replay");

Expand Down Expand Up @@ -933,7 +865,7 @@ protected void handleReplay(WaybackRequest wbRequest,
// If the status is a redirect, check that the location or url date's are different from the current request
// Otherwise, replay the previous matched capture.
// This chain is unlikely to go past one previous capture, but is possible
if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURL)) {
if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURLKey)) {
LOGGER.info("Self-Redirect: Skipping " + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl());
//closest = findNextClosest(closest, captureResults, requestMS);
closest = captureSelector.next();
Expand Down Expand Up @@ -1300,20 +1232,6 @@ public void setExactHostMatch(boolean exactHostMatch) {
this.exactHostMatch = exactHostMatch;
}

/**
* @return the exactSchemeMatch
*/
public boolean isExactSchemeMatch() {
return exactSchemeMatch;
}

/**
* @param exactSchemeMatch the exactSchemeMatch to set
*/
public void setExactSchemeMatch(boolean exactSchemeMatch) {
this.exactSchemeMatch = exactSchemeMatch;
}

/**
* @return true if this AccessPoint is configured to useAnchorWindow, that
* is, to replay documents only if they are within a certain proximity to
Expand Down Expand Up @@ -1842,23 +1760,6 @@ public CustomResultFilterFactory getFilterFactory() {
return filterFactory;
}

/**
* Optional
* @param selfRedirectCanonicalizer
*/
public void setSelfRedirectCanonicalizer(
UrlCanonicalizer selfRedirectCanonicalizer) {
this.selfRedirectCanonicalizer = selfRedirectCanonicalizer;
}

/**
* URL canonicalizer for testing self-redirect.
* @return UrlCanonicalizer
*/
public UrlCanonicalizer getSelfRedirectCanonicalizer() {
return this.selfRedirectCanonicalizer;
}

public int getMaxRedirectAttempts() {
return maxRedirectAttempts;
}
Expand Down
@@ -0,0 +1,139 @@
/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.archive.wayback.webapp;

import java.io.IOException;

import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.Resource;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.util.webapp.AbstractRequestHandler;

/**
* AccessPointBase provides fields and methods common to AbstractRequestHandler
* implementations for core wayback machine functionalities (playback, search,
* live web archiving, etc.)
*/
public abstract class AccessPointBase extends AbstractRequestHandler {

private boolean exactSchemeMatch = false;
private UrlCanonicalizer selfRedirectCanonicalizer = null;

protected boolean isSelfRedirect(Resource resource,
CaptureSearchResult closest, WaybackRequest wbRequest,
String canonRequestURL) {
int status = resource.getStatusCode();

// Only applies to redirects
if ((status < 300) || (status >= 400)) {
return false;
}

String location = resource.getHeader("Location");

if (location == null) {
return false;
}

// if (!closest.getCaptureTimestamp().equals(wbRequest.getReplayTimestamp())) {
// return false;
// }

String redirScheme = UrlOperations.urlToScheme(location);

try {
if (redirScheme == null && isExactSchemeMatch()) {
location = UrlOperations.resolveUrl(closest.getOriginalUrl(),
location);
redirScheme = UrlOperations.urlToScheme(location);
} else if (location.startsWith("/")) {
location = UrlOperations.resolveUrl(closest.getOriginalUrl(),
location);
}

if (getSelfRedirectCanonicalizer() != null) {
location = getSelfRedirectCanonicalizer().urlStringToKey(
location);
}
} catch (IOException e) {
return false;
}

if (location.equals(canonRequestURL)) {
// if not exact scheme, don't do scheme compare, must be equal
if (!isExactSchemeMatch()) {
return true;
}

String origScheme = UrlOperations.urlToScheme(wbRequest
.getRequestUrl());

if ((origScheme != null) && (redirScheme != null) &&
(origScheme.compareTo(redirScheme) == 0)) {
return true;
}
}

return false;
}

/**
* @return the exactSchemeMatch
*/
public boolean isExactSchemeMatch() {
return exactSchemeMatch;
}

/**
* @param exactSchemeMatch the exactSchemeMatch to set
*/
public void setExactSchemeMatch(boolean exactSchemeMatch) {
this.exactSchemeMatch = exactSchemeMatch;
}

/**
* Optional
* @param selfRedirectCanonicalizer
*/
public void setSelfRedirectCanonicalizer(UrlCanonicalizer selfRedirectCanonicalizer) {
this.selfRedirectCanonicalizer = selfRedirectCanonicalizer;
}

/**
* URL canonicalizer for testing self-redirect.
* @return UrlCanonicalizer
*/
public UrlCanonicalizer getSelfRedirectCanonicalizer() {
return selfRedirectCanonicalizer;
}

protected String urlToKey(String url) {
if (selfRedirectCanonicalizer != null) {
try {
return selfRedirectCanonicalizer.urlStringToKey(url);
} catch (IOException ex) {
}
}
return url;
}
}

0 comments on commit a430a24

Please sign in to comment.