Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import com.dotcms.analytics.metrics.*;
import com.dotcms.experiments.model.Experiment;
import com.dotcms.vanityurl.business.VanityUrlAPI;
import com.dotcms.vanityurl.model.CachedVanityUrl;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
Expand All @@ -21,6 +22,7 @@
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Util class to calculate the regex pattern for a given {@link HTMLPageAsset}
Expand Down Expand Up @@ -51,6 +53,25 @@ public enum ExperimentUrlPatternCalculator {
* If the page use inside the Experiment isuse as Detail Page on any Content Type then is even
* more complicated.
*
* <p><b>Security note:</b> The returned pattern is serialized to the
* Experiments Analytics SDK and evaluated client-side via
* {@code new RegExp(...).test(...)}. Because Vanity URL URIs can contain
* admin-authored regex, the assembled pattern is NOT protected by
* {@link com.dotcms.regex.MatcherTimeoutFactory} (which only guards the
* server-side Vanity URL resolver). Client-side ReDoS protection is tracked
* as a follow-up in <a href="https://github.com/dotCMS/core/issues/35379">#35379</a>.
*
* <p><b>Case folding:</b> The returned pattern is emitted entirely in
* lowercase — both the experiment-page alternative and every Vanity URL
* alternative — to match the SDK tracker, which lowercases the incoming
* URL path before calling {@code test}. As a side effect, any admin-authored
* vanity URI that relies on uppercase characters or uppercase-only
* character classes (e.g. {@code [A-Z]+}) is folded to lowercase in this
* path; such patterns are unsupported here. This is consistent with the
* server-side resolver, which already compiles vanity patterns with
* {@link java.util.regex.Pattern#CASE_INSENSITIVE} so case-sensitive regex
* constructs do not influence vanity matching in any consumer.
*
* @param experiment
* @return
*/
Expand Down Expand Up @@ -80,12 +101,46 @@ public String calculatePageUrlRegexPattern(final Experiment experiment) {
private static String getVanityUrlsRegex(final Host host, final Language language,
final HTMLPageAsset htmlPageAsset) throws DotDataException {

final String vanityUrlRegex = APILocator.getVanityUrlAPI()
.findByForward(host, language, htmlPageAsset.getURI(), 200)
.stream()
.map(vanitysUrls -> String.format(DEFAULT_URL_REGEX_TEMPLATE, vanitysUrls.pattern))
.collect(Collectors.joining(StringPool.PIPE));
return vanityUrlRegex.isEmpty() ? StringPool.BLANK : String.format("^%s$", vanityUrlRegex);
// includeSystemHost=true: a /cmsHomePage vanity forwarding to the
// experiment page may be published on SYSTEM_HOST (site-wide), so we
// need those matches as well. Mirrors resolveVanityUrl's host fallback.
final List<CachedVanityUrl> vanityUrls = APILocator.getVanityUrlAPI()
.findByForward(host, language, htmlPageAsset.getURI(), 200, true);

// Exact match is intentional — regex-based cmsHomePage URIs (e.g. "/cmsHome.*")
// are unsupported here. VanityUrlAPIImpl.resolveVanityUrl's legacy fallback
// looks up the literal LEGACY_CMS_HOME_PAGE string, so only vanities whose
// URI equals it (case-insensitive) actually participate in the "/" fallback.
final boolean hasCmsHomePageVanity = vanityUrls.stream()
.anyMatch(vanity -> VanityUrlAPI.LEGACY_CMS_HOME_PAGE.equalsIgnoreCase(vanity.url));

// When a /cmsHomePage vanity forwards to the experiment page, visitors
// reach it at "/" (see VanityUrlAPIImpl.resolveVanityUrl legacy fallback)
// — add "/" as an extra alternative so the regex still matches.
final String vanityUrlRegex = Stream.concat(
vanityUrls.stream()
// Skip vanities whose URI failed CachedVanityUrl.normalize
// (VanityUrlUtil.isValidRegex returned false) — their
// compiled Pattern's source is "", which would otherwise
// expand the URL template into a catch-all.
.filter(vanity -> !vanity.pattern.pattern().isEmpty())
.map(vanity -> String.format(DEFAULT_URL_REGEX_TEMPLATE, vanity.pattern.pattern())),
hasCmsHomePageVanity
? Stream.of(String.format(DEFAULT_URL_REGEX_TEMPLATE, "\\/?"))
: Stream.empty()
).collect(Collectors.joining(StringPool.PIPE));

// Lowercase the ENTIRE assembled vanity regex — this affects every
// vanity pattern joined above, not just the /cmsHomePage fallback. The
// SDK (parser.ts#verifyRegex) lowercases the incoming URL path before
// calling RegExp.test, so a mixed-case vanity URI stored by the admin
// would otherwise never match. Consequence: any admin-authored regex
// construct that depends on uppercase characters (e.g. "[A-Z]+") is
// folded to lowercase here and is unsupported in this path. This is
// consistent with CachedVanityUrl, which compiles each vanity's URI
// pattern with Pattern.CASE_INSENSITIVE — server-side vanity matching
// is already case-insensitive, so no consumer loses functionality.
return vanityUrlRegex.isEmpty() ? StringPool.BLANK : String.format("^%s$", vanityUrlRegex).toLowerCase();
}

private HTMLPageAsset getHtmlPageAsset(final Experiment experiment) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ public interface VanityUrlAPI {

String VANITY_URL_RESPONSE_HEADER = "X-DOT-VanityUrl";

/**
* Legacy Vanity URL URI used as the fallback home page. When the incoming
* request path is "/" and no other Vanity URL matches, implementations
* resolve this URI instead to support the historical cmsHomePage behavior.
*/
String LEGACY_CMS_HOME_PAGE = "/cmsHomePage";

/**
* Verifies that the Vanity URL as Contentlet has all the required fields. the list of mandatory fields can be
* verified in the Content Type's definition.
Expand Down Expand Up @@ -109,16 +116,56 @@ boolean handleVanityURLRedirects(VanityUrlRequestWrapper request, HttpServletRes


/**
* Look all the {@link VanityUrl} that are equals to forward
* Look up all published {@link VanityUrl}s on the given host whose
* {@code forwardTo} equals {@code forward} and whose action equals
* {@code action}.
*
* <p>This is the pre-PR canonical form — it only searches the specified
* host. Callers that also want vanities from {@code SYSTEM_HOST} should use
* {@link #findByForward(Host, Language, String, int, boolean)} with
* {@code includeSystemHost = true}.
*
* @param host {@link VanityUrl}'s Host
* @param language {@link VanityUrl}'s Language
* @param forward forward to look for
* @param language action to look for
* @return
* @param forward forward target to look for
* @param action HTTP action code to look for (e.g. 200, 301, 302)
* @return the matching {@link CachedVanityUrl}s from the given host only
*/
List<CachedVanityUrl> findByForward(Host host, Language language, String forward, int action);

/**
* Extended overload of {@link #findByForward(Host, Language, String, int)}
* that can also include vanities published on {@code SYSTEM_HOST}, mirroring
* the host-resolution fallback in {@link #resolveVanityUrl}. The flag is
* explicit so the widened result scope is visible at every call site.
*
* <p>The default implementation delegates to the 4-arg overload (host-only
* results), so existing {@link VanityUrlAPI} implementors — including OSGi
* alternative providers — that did not override this method continue to
* work without throwing {@code AbstractMethodError}. Concrete
* implementations such as {@link VanityUrlAPIImpl} override this default
* with {@code SYSTEM_HOST}-aware logic when the flag is {@code true}.
*
* <p><b>Authorization:</b> Intended for system-user / internal routing
* contexts (e.g. the Experiments URL pattern engine) where the caller
* represents the platform itself rather than an end user. It performs no
* permission check. Do not use it where the caller lacks {@code READ}
* permission on the host, or where results are exposed directly to an
* end user — especially when {@code includeSystemHost} is {@code true}.
*
* @param host {@link VanityUrl}'s Host
* @param language {@link VanityUrl}'s Language
* @param forward forward target to look for
* @param action HTTP action code to look for (e.g. 200, 301, 302)
* @param includeSystemHost if {@code true}, also return vanities published on {@code SYSTEM_HOST}
* @return the matching {@link CachedVanityUrl}s from the given host, and optionally from {@code SYSTEM_HOST}
*/
default List<CachedVanityUrl> findByForward(final Host host, final Language language,
final String forward, final int action,
final boolean includeSystemHost) {
return findByForward(host, language, forward, action);
}

/**
*
* @param vanityUrl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Implementation class for the {@link VanityUrlAPI}.
Expand All @@ -69,7 +70,13 @@ public class VanityUrlAPIImpl implements VanityUrlAPI {
+ " (select velocity_var_name from structure where structuretype=7)";


public static final String LEGACY_CMS_HOME_PAGE = "/cmsHomePage";
/**
* Retained for source compatibility with callers that referenced
* {@code VanityUrlAPIImpl.LEGACY_CMS_HOME_PAGE} before the constant was
* promoted to the {@link VanityUrlAPI} interface. Points at the canonical
* interface constant; new code should use {@link VanityUrlAPI#LEGACY_CMS_HOME_PAGE}.
*/
public static final String LEGACY_CMS_HOME_PAGE = VanityUrlAPI.LEGACY_CMS_HOME_PAGE;
private final ContentletAPI contentletAPI;
private final VanityUrlCache cache;
private final LanguageAPI languageAPI;
Expand Down Expand Up @@ -257,7 +264,7 @@ public Optional<CachedVanityUrl> resolveVanityUrl(final String url, final Host s
// if this is the /cmsHomePage vanity
if (matched.isEmpty() && StringPool.FORWARD_SLASH.equals(url)) {

matched = resolveVanityUrl(LEGACY_CMS_HOME_PAGE, site, language);
matched = resolveVanityUrl(VanityUrlAPI.LEGACY_CMS_HOME_PAGE, site, language);
}


Expand Down Expand Up @@ -452,12 +459,29 @@ private String encodeRedirectURL(final String uri) {
}
}

@Override
public List<CachedVanityUrl> findByForward(final Host host, final Language language, final String forward,
final int action) {
// Delegate to the 5-arg overload with host-only semantics (no SYSTEM_HOST).
// The 5-arg method carries @CloseDBIfOpened; ByteBuddy advice fires on the
// self-invocation, so this delegation keeps connection lifecycle correct
// without duplicating the annotation.
return findByForward(host, language, forward, action, false);
}

@Override
@CloseDBIfOpened
public List<CachedVanityUrl> findByForward(final Host host, final Language language, final String forward,
int action) {
return load(host, language)
.stream()
final int action, final boolean includeSystemHost) {
// When includeSystemHost is true, also pull vanities published on
// SYSTEM_HOST — they apply site-wide, mirroring resolveVanityUrl's
// SYSTEM_HOST fallback.
final Host systemHost = APILocator.systemHost();
final Stream<CachedVanityUrl> systemHostVanities = includeSystemHost && !systemHost.equals(host)
? load(systemHost, language).stream()
: Stream.empty();

return Stream.concat(load(host, language).stream(), systemHostVanities)
.filter(cachedVanityUrl -> cachedVanityUrl.response == action)
.filter(cachedVanityUrl -> cachedVanityUrl.forwardTo.equals(forward))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import com.dotmarketing.exception.DotSecurityException;
import com.dotmarketing.filters.CMSUrlUtil;

import com.dotmarketing.logConsole.model.LogMapper;
import com.dotmarketing.portlets.languagesmanager.model.Language;
import com.dotmarketing.util.Logger;
import java.io.IOException;
Expand All @@ -45,7 +44,6 @@ public class VisitorFilter implements Filter {

private final LanguageWebAPI languageWebAPI;
private final UserWebAPI userWebAPI;
private final static String CMS_HOME_PAGE = "/cmsHomePage";
public final static String VANITY_URL_ATTRIBUTE="VANITY_URL_ATTRIBUTE";
public final static String DOTPAGE_PROCESSING_TIME="DOTPAGE_PROCESSING_TIME";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.dotcms.util.IntegrationTestInitService;
import com.dotcms.vanityurl.model.VanityUrl;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
import com.dotmarketing.exception.DotDataException;
import com.dotmarketing.portlets.contentlet.model.Contentlet;
import com.dotmarketing.portlets.htmlpageasset.model.HTMLPageAsset;
Expand Down Expand Up @@ -303,6 +304,118 @@ public void experimentWithVanityUrl() throws DotDataException {

}

/**
* Method to test: {@link ExperimentUrlPatternCalculator#calculatePageUrlRegexPattern(Experiment)}
* When: A Published Vanity Url with URI "/cmsHomePage" and action 200 forwards to
* the Experiment's Page. Per the legacy fallback in
* {@link com.dotcms.vanityurl.business.VanityUrlAPIImpl#resolveVanityUrl}, a visitor
* requesting "/" is transparently forwarded to the target page, so the browser URL
* stays "/".
* Should: The regex returned by the method should match the Experiment Page URL,
* the "/cmsHomePage" URL, and the root URL "/".
*
* See issue https://github.com/dotCMS/core/issues/34747
*
* @throws DotDataException
*/
@Test
public void experimentWithCmsHomePageVanity() throws DotDataException {

final Host host = new SiteDataGen().nextPersisted();
final Template template = new TemplateDataGen().host(host).nextPersisted();

final HTMLPageAsset experimentPage = new HTMLPageDataGen(host, template).nextPersisted();

final Condition<Object> condition = Condition.builder()
.parameter("url")
.value("testing")
.operator(AbstractCondition.Operator.CONTAINS)
.build();

final Metric metric = Metric.builder()
.name("Testing Metric")
.type(MetricType.REACH_PAGE)
.addConditions(condition).build();

final Goals goal = Goals.builder().primary(GoalFactory.create(metric)).build();
final Experiment experiment = new ExperimentDataGen()
.page(experimentPage)
.addGoal(goal)
.nextPersisted();

final Contentlet vanityUrl = new VanityUrlDataGen()
.uri("/cmsHomePage")
.forwardTo(experimentPage.getURI())
.action(200)
.host(host)
.languageId(experimentPage.getLanguageId())
.nextPersistedAndPublish();

final String regex = ExperimentUrlPatternCalculator.INSTANCE.calculatePageUrlRegexPattern(experiment);

// The SDK lowercases incoming URL paths before matching (see
// verifyRegex in parser.ts), and the server lowercases the assembled
// regex, so test with lowercased URLs that mirror runtime behavior.
assertTrue(("http://localhost:8080/" + experimentPage.getPageUrl()).matches(regex));
assertTrue(("http://localhost:8080/cmshomepage").matches(regex));
assertTrue(("http://localhost:8080/").matches(regex));
assertTrue(("http://localhost:8080").matches(regex));
}

/**
* Method to test: {@link ExperimentUrlPatternCalculator#calculatePageUrlRegexPattern(Experiment)}
* When: A Published Vanity Url with URI "/cmsHomePage" and action 200 is published
* on SYSTEM_HOST (rather than on the experiment page's host) and forwards to the
* Experiment's Page. Vanities on SYSTEM_HOST apply site-wide, so the "/" fallback
* still applies.
* Should: The regex returned by the method should match the Experiment Page URL
* and the root URL "/".
*
* See issue https://github.com/dotCMS/core/issues/34747
*
* @throws DotDataException
*/
@Test
public void experimentWithSystemHostCmsHomePageVanity() throws DotDataException {

final Host host = new SiteDataGen().nextPersisted();
final Template template = new TemplateDataGen().host(host).nextPersisted();

final HTMLPageAsset experimentPage = new HTMLPageDataGen(host, template).nextPersisted();

final Condition<Object> condition = Condition.builder()
.parameter("url")
.value("testing")
.operator(AbstractCondition.Operator.CONTAINS)
.build();

final Metric metric = Metric.builder()
.name("Testing Metric")
.type(MetricType.REACH_PAGE)
.addConditions(condition).build();

final Goals goal = Goals.builder().primary(GoalFactory.create(metric)).build();
final Experiment experiment = new ExperimentDataGen()
.page(experimentPage)
.addGoal(goal)
.nextPersisted();

final Contentlet vanityUrl = new VanityUrlDataGen()
.uri("/cmsHomePage")
.forwardTo(experimentPage.getURI())
.action(200)
.host(APILocator.systemHost())
.languageId(experimentPage.getLanguageId())
.nextPersistedAndPublish();

final String regex = ExperimentUrlPatternCalculator.INSTANCE.calculatePageUrlRegexPattern(experiment);

assertTrue(("http://localhost:8080/" + experimentPage.getPageUrl()).matches(regex));
assertTrue(("http://localhost:8080/cmshomepage").matches(regex));
assertTrue(("http://localhost:8080/").matches(regex));
assertTrue(("http://localhost:8080").matches(regex));
}

/**
* Method to test: {@link ExperimentUrlPatternCalculator#calculatePageUrlRegexPattern(Experiment)}
* When: Exists a Published Vanity Url with the forwardTo equals to the URI og the Experiment's Page but with not 200 action
Expand Down
Loading
Loading