-
-
Notifications
You must be signed in to change notification settings - Fork 156
/
DefaultIndexer.php
154 lines (124 loc) · 5.12 KB
/
DefaultIndexer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?php
declare(strict_types=1);
/*
* This file is part of Contao.
*
* (c) Leo Feyer
*
* @license LGPL-3.0-or-later
*/
namespace Contao\CoreBundle\Search\Indexer;
use Contao\CoreBundle\Framework\ContaoFramework;
use Contao\CoreBundle\Search\Document;
use Contao\Search;
use Doctrine\DBAL\Connection;
class DefaultIndexer implements IndexerInterface
{
private ContaoFramework $framework;
private Connection $connection;
private bool $indexProtected;
/**
* @internal Do not inherit from this class; decorate the "contao.search.default_indexer" service instead
*/
public function __construct(ContaoFramework $framework, Connection $connection, bool $indexProtected = false)
{
$this->framework = $framework;
$this->connection = $connection;
$this->indexProtected = $indexProtected;
}
public function index(Document $document): void
{
if (200 !== $document->getStatusCode()) {
$this->throwBecause('HTTP Statuscode is not equal to 200.');
}
if ('' === $document->getBody()) {
$this->throwBecause('Cannot index empty response.');
}
if (($canonical = $document->extractCanonicalUri()) && ((string) $canonical !== (string) $document->getUri())) {
$this->throwBecause(sprintf('Ignored because canonical URI "%s" does not match document URI.', $canonical));
}
try {
$title = $document->getContentCrawler()->filterXPath('//head/title')->first()->text(null, true);
} catch (\Exception $e) {
$title = 'undefined';
}
try {
$language = $document->getContentCrawler()->filterXPath('//html[@lang]')->first()->attr('lang');
} catch (\Exception $e) {
$language = 'en';
}
$meta = [
'title' => $title,
'language' => $language,
'protected' => false,
'groups' => [],
];
$this->extendMetaFromJsonLdScripts($document, $meta);
if (!isset($meta['pageId']) || 0 === $meta['pageId']) {
$this->throwBecause('No page ID could be determined.');
}
// If search was disabled in the page settings, we do not index
if (isset($meta['noSearch']) && true === $meta['noSearch']) {
$this->throwBecause('Was explicitly marked "noSearch" in page settings.');
}
// If the front end preview is activated, we do not index
if (isset($meta['fePreview']) && true === $meta['fePreview']) {
$this->throwBecause('Indexing when the front end preview is enabled is not possible.');
}
// If the page is protected and indexing protecting pages is disabled, we do not index
if (isset($meta['protected']) && true === $meta['protected'] && !$this->indexProtected) {
$this->throwBecause('Indexing protected pages is disabled.');
}
$this->framework->initialize();
$search = $this->framework->getAdapter(Search::class);
try {
$search->indexPage([
'url' => (string) $document->getUri(),
'content' => $document->getBody(),
'protected' => $meta['protected'] ? '1' : '',
'groups' => $meta['groups'],
'pid' => $meta['pageId'],
'title' => $meta['title'],
'language' => $meta['language'],
'meta' => $document->extractJsonLdScripts(),
]);
} catch (\Throwable $t) {
$this->throwBecause('Could not add a search index entry: '.$t->getMessage(), false);
}
}
public function delete(Document $document): void
{
$this->framework->initialize();
$search = $this->framework->getAdapter(Search::class);
$search->removeEntry((string) $document->getUri());
}
public function clear(): void
{
$this->connection->executeStatement('TRUNCATE TABLE tl_search');
$this->connection->executeStatement('TRUNCATE TABLE tl_search_index');
$this->connection->executeStatement('TRUNCATE TABLE tl_search_term');
}
/**
* @return never
*/
private function throwBecause(string $message, bool $onlyWarning = true): void
{
if ($onlyWarning) {
throw IndexerException::createAsWarning($message);
}
throw new IndexerException($message);
}
private function extendMetaFromJsonLdScripts(Document $document, array &$meta): void
{
$jsonLds = $document->extractJsonLdScripts('https://schema.contao.org/', 'Page');
if (0 === \count($jsonLds)) {
$jsonLds = $document->extractJsonLdScripts('https://schema.contao.org/', 'RegularPage');
if (0 === \count($jsonLds)) {
$this->throwBecause('No JSON-LD found.');
}
trigger_deprecation('contao/core-bundle', '4.9', 'Using the JSON-LD type "RegularPage" has been deprecated and will no longer work in Contao 5.0. Use "Page" instead.');
}
// Merge all entries to one meta array (the latter overrides the former)
$meta = array_merge($meta, array_merge(...$jsonLds));
}
}