/
SitemapScheduler.java
241 lines (205 loc) · 10.4 KB
/
SitemapScheduler.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sling.sitemap.impl;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.api.resource.path.PathSet;
import org.apache.sling.commons.scheduler.Scheduler;
import org.apache.sling.event.jobs.Job;
import org.apache.sling.event.jobs.JobManager;
import org.apache.sling.serviceusermapping.ServiceUserMapped;
import org.apache.sling.sitemap.SitemapGeneratorManager;
import org.jetbrains.annotations.Nullable;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.ConfigurationPolicy;
import org.osgi.service.component.annotations.Reference;
import org.osgi.service.metatype.annotations.AttributeDefinition;
import org.osgi.service.metatype.annotations.Designate;
import org.osgi.service.metatype.annotations.ObjectClassDefinition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
import static org.apache.sling.sitemap.SitemapUtil.findSitemapRoots;
@Component(
service = {SitemapScheduler.class, Runnable.class},
configurationPolicy = ConfigurationPolicy.REQUIRE,
property = {
Scheduler.PROPERTY_SCHEDULER_CONCURRENT + ":Boolean=false",
Scheduler.PROPERTY_SCHEDULER_RUN_ON + "=" + Scheduler.VALUE_RUN_ON_SINGLE,
Scheduler.PROPERTY_SCHEDULER_THREAD_POOL + "=" + SitemapScheduler.THREADPOOL_NAME
}
)
@Designate(ocd = SitemapScheduler.Configuration.class, factory = true)
public class SitemapScheduler implements Runnable {
@ObjectClassDefinition(name = "Apache Sling Sitemap - Scheduler")
@interface Configuration {
@AttributeDefinition(name = "Name", description = "The name of the scheduler configuration")
String scheduler_name();
@AttributeDefinition(name = "Schedule", description = "A cron expression defining the schedule at which the " +
"sitemap generation jobs will be scheduled.")
String scheduler_expression();
@AttributeDefinition(name = "Include Generators", description = "A list of full qualified class names of " +
"SitemapGenerator implementations. If set only the listed SitemapGenerators will be called. If left " +
"empty all will be called.")
String[] includeGenerators() default {};
@AttributeDefinition(name = "Exclude Generators", description = "A list of full qualified class names of " +
"SitemapGenerator implementations. If set the listed SitemapGenerators will not be called. If left " +
"empty all will be called.")
String[] excludeGenerators() default {};
@AttributeDefinition(name = "Names", description = "A list of names. If set only sitemaps for the given " +
"names will be generated by. If left empty all will be generated.")
String[] names() default {};
@AttributeDefinition(name = "Search Path", description = "The path under which sitemap roots should be " +
"searched for")
String searchPath() default "/content";
@AttributeDefinition(name = "Include Paths", description = "A list of paths that should be included by the scheduler. "
+ "If left empty, all sitemap roots in the configured search path will be included. Absolute paths and glob patterns "
+ "are supported.")
String[] includePaths() default {};
@AttributeDefinition(name = "Exclude Paths", description = "A list of paths that should be excluded by the scheduler. "
+ "If left empty, no sitemap roots in the configured search path will be excluded. Absolute paths and glob patterns "
+ "are supported.")
String[] excludePaths() default {};
}
public static final String THREADPOOL_NAME = "org-apache-sling-sitemap";
private Logger log;
private static final Map<String, Object> AUTH = Collections.singletonMap(ResourceResolverFactory.SUBSERVICE,
"sitemap-reader");
@Reference
private JobManager jobManager;
@Reference
private ResourceResolverFactory resourceResolverFactory;
@Reference
private SitemapGeneratorManager generatorManager;
@Reference(target = "(subServiceName=sitemap-reader)")
private ServiceUserMapped serviceUserMapped;
private Set<String> names;
private Set<String> includeGenerators;
private Set<String> excludeGenerators;
private String searchPath;
private PathSet includePaths;
private PathSet excludePaths;
@Activate
protected void activate(Configuration configuration) {
log = LoggerFactory.getLogger(SitemapScheduler.class.getName() + '~' + configuration.scheduler_name());
includeGenerators = asSet(configuration.includeGenerators());
excludeGenerators = asSet(configuration.excludeGenerators());
names = asSet(configuration.names());
searchPath = configuration.searchPath();
if (configuration.includePaths().length > 0) {
includePaths = PathSet.fromStringCollection(Arrays.asList(configuration.includePaths()));
}
if (configuration.excludePaths().length > 0) {
excludePaths = PathSet.fromStringCollection(Arrays.asList(configuration.excludePaths()));
}
}
@Override
public void run() {
schedule(null);
}
public void schedule(@Nullable Collection<String> includeNames) {
try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
Iterator<Resource> sitemapRoots = findSitemapRoots(resolver, searchPath);
while (sitemapRoots.hasNext()) {
schedule(sitemapRoots.next(), includeNames);
}
} catch (LoginException ex) {
log.warn("Failed start sitemap jobs: {}", ex.getMessage(), ex);
}
}
public void schedule(Resource sitemapRoot, @Nullable Collection<String> includeNames) {
if (isExcluded(sitemapRoot)) {
return;
}
Set<String> configuredNames = getApplicableNames(sitemapRoot);
if (includeNames != null) {
configuredNames.retainAll(includeNames);
}
for (String applicableName : configuredNames) {
addJob(sitemapRoot.getPath(), applicableName);
}
}
/**
* Returns the names for the given sitemap root this {@link SitemapScheduler} is applicable to. This depends on the
* configured generators. If no generators were configured the names of all are returned. If some are configured
* the names provided only by those where the class name matches are returned.
*
* @param sitemapRoot
* @return
*/
public Set<String> getApplicableNames(Resource sitemapRoot) {
if (isExcluded(sitemapRoot)) {
return Collections.emptySet();
}
Set<String> onDemandNames = generatorManager.getOnDemandNames(sitemapRoot);
Set<String> toSchedule = generatorManager.getGenerators(sitemapRoot).entrySet().stream()
.filter(entry -> includeGenerators == null
|| includeGenerators.contains(entry.getValue().getClass().getName()))
.filter(entry -> excludeGenerators == null
|| !excludeGenerators.contains(entry.getValue().getClass().getName()))
.filter(entry -> !onDemandNames.contains(entry.getKey()))
.map(Map.Entry::getKey)
.collect(Collectors.toSet());
// limit to the configured names
if (names != null) {
toSchedule.retainAll(names);
}
return toSchedule;
}
protected boolean isExcluded(Resource sitemapRoot) {
// verify that the sitemapRoot is in the schedulers search path
if (!sitemapRoot.getPath().equals(searchPath) && !sitemapRoot.getPath().startsWith(searchPath + "/")) {
log.debug("Exclude sitemap root outside of the configured search path '{}': {}", searchPath, sitemapRoot.getPath());
return true;
}
// verify if the sitemapRoot is included
if (includePaths != null && includePaths.matches(sitemapRoot.getPath()) == null) {
log.debug("Sitemap root is not included: {}", sitemapRoot.getPath());
return true;
}
// verify if the sitemapRoot is not excluded
if (excludePaths != null && excludePaths.matches(sitemapRoot.getPath()) != null) {
log.debug("Sitemap root is explicitly excluded: {}", sitemapRoot.getPath());
return true;
}
return false;
}
protected void addJob(String sitemapRoot, String applicableName) {
Map<String, Object> jobProperties = new HashMap<>();
jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_NAME, applicableName);
jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_ROOT, sitemapRoot);
Job job = jobManager.addJob(SitemapGeneratorExecutor.JOB_TOPIC, jobProperties);
log.debug("Added job {}", job.getId());
}
@Nullable
private static Set<String> asSet(@Nullable String[] configuration) {
if (configuration == null || configuration.length == 0) {
return null;
}
Set<String> result = Arrays.stream(configuration)
.filter(Objects::nonNull)
.filter(entry -> !"".equals(entry.trim()))
.collect(Collectors.toSet());
return result.isEmpty() ? null : result;
}
}