Skip to content

Commit cd62fbf

Browse files
chore: Move calculation of "retry after" into Response
1 parent 5fc6f75 commit cd62fbf

File tree

3 files changed

+113
-106
lines changed

3 files changed

+113
-106
lines changed

lib/SpiderBits/src/Response.php

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,4 +230,112 @@ public static function extractCharsetFromContentType(string $content_type): ?str
230230
$charset = trim($charset, '"');
231231
return $charset;
232232
}
233+
234+
/**
235+
* Returns the date after which the response is considered as stale.
236+
*
237+
* The expiration duration is calculated based on HTTP headers of the
238+
* response.
239+
*
240+
* @see https://httpwg.org/specs/rfc9111.html
241+
*/
242+
public function getRetryAfter(
243+
int $default_duration = 1 * 60 * 60,
244+
int $min_duration = 1 * 60 * 15,
245+
int $max_duration = 1 * 60 * 60 * 24 * 7,
246+
): \DateTimeImmutable {
247+
$age = $this->header('Age', '0');
248+
$expires = $this->header('Expires', '');
249+
$retry_after = $this->header('Retry-After', '0');
250+
251+
$cache_control_directives = $this->getCacheControlDirectives();
252+
253+
$duration = $default_duration;
254+
255+
if (isset($cache_control_directives['max-age'])) {
256+
$max_age = (int) $cache_control_directives['max-age'];
257+
$age = (int) $age;
258+
$duration = $max_age - $age;
259+
} elseif ($expires) {
260+
$expired_at = self::parseHttpDate($expires);
261+
262+
if ($expired_at === null) {
263+
$expired_at = \Minz\Time::now();
264+
}
265+
266+
$expires_timestamp = $expired_at->getTimestamp();
267+
$now_timestamp = \Minz\Time::now()->getTimestamp();
268+
269+
$duration = $expires_timestamp - $now_timestamp;
270+
} elseif ($this->status === 429) {
271+
$retry_at = self::parseHttpDate($retry_after);
272+
273+
if ($retry_at === null) {
274+
$duration = (int) $retry_after;
275+
} else {
276+
$retry_at_timestamp = $retry_at->getTimestamp();
277+
$now_timestamp = \Minz\Time::now()->getTimestamp();
278+
279+
$duration = $retry_at_timestamp - $now_timestamp;
280+
}
281+
}
282+
283+
$duration = max($min_duration, $duration);
284+
$duration = min($max_duration, $duration);
285+
return \Minz\Time::fromNow($duration, 'seconds');
286+
}
287+
288+
/**
289+
* Parses the "Cache-Control" HTTP header and returns an array with the
290+
* different cache directives.
291+
*
292+
* @return array<string, string|true>
293+
*/
294+
public function getCacheControlDirectives(): array
295+
{
296+
$directives = [];
297+
298+
$cache_control = $this->header('Cache-Control', '');
299+
$cache_control_parts = explode(',', $cache_control);
300+
301+
foreach ($cache_control_parts as $part) {
302+
$part = trim($part);
303+
304+
if (str_contains($part, '=')) {
305+
list($directive, $value) = explode('=', $part, 2);
306+
} else {
307+
$directive = $part;
308+
$value = true;
309+
}
310+
311+
$directive = strtolower($directive);
312+
313+
$directives[$directive] = $value;
314+
}
315+
316+
return $directives;
317+
}
318+
319+
/**
320+
* Parses an HTTP date header.
321+
*/
322+
public static function parseHttpDate(string $expires): ?\DateTimeImmutable
323+
{
324+
$formats = [
325+
\DateTimeInterface::RFC7231,
326+
\DateTimeInterface::RFC850,
327+
// Ignore the ANSI C's asctime() format as obsolete and more
328+
// difficult to parse.
329+
];
330+
331+
foreach ($formats as $format) {
332+
$expired_at = \DateTimeImmutable::createFromFormat($format, $expires);
333+
334+
if ($expired_at) {
335+
return $expired_at;
336+
}
337+
}
338+
339+
return null;
340+
}
233341
}

src/http/Cache.php

Lines changed: 5 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -57,70 +57,27 @@ public function getResponse(string $url): ?Response
5757
/**
5858
* Saves an HTTP response in the cache.
5959
*
60-
* The expiration duration is calculated based on HTTP headers of the
61-
* response.
62-
*
6360
* @see https://httpwg.org/specs/rfc9111.html
6461
*/
6562
public function saveResponse(string $url, Response $response): void
6663
{
6764
$code = $response->status;
6865

69-
if ($code < 200 || $code === 206 || $code === 304 || $code >= 500) {
66+
if ($code < 200 || $code === 206 || $code === 304) {
7067
// Don't cache the response if the response is not finale, or if
71-
// status is 206 or 304 as we don't handle these special codes, or
72-
// if the server is in error.
68+
// status is 206 or 304 as we don't handle these special codes.
7369
return;
7470
}
7571

76-
$cache_control = $response->header('Cache-Control', '');
77-
$age = $response->header('Age', '0');
78-
$expires = $response->header('Expires', '');
79-
$retry_after = $response->header('Retry-After', '0');
80-
81-
$directives = $this->parseCacheControl($cache_control);
72+
$cache_control_directives = $response->getCacheControlDirectives();
8273

83-
if (isset($directives['no-store']) || isset($directives['no-cache'])) {
74+
if (isset($cache_control_directives['no-store']) || isset($cache_control_directives['no-cache'])) {
8475
// These directives tell us to not cache the response.
8576
// We may consider to use the min_duration though, but let's try
8677
// for a bit with this configuration.
8778
return;
8879
}
8980

90-
$duration = $this->default_duration;
91-
92-
if (isset($directives['max-age'])) {
93-
$max_age = (int) $directives['max-age'];
94-
$age = (int) $age;
95-
$duration = $max_age - $age;
96-
} elseif ($expires) {
97-
$expired_at = $this->parseHttpDate($expires);
98-
99-
if ($expired_at === null) {
100-
$expired_at = \Minz\Time::now();
101-
}
102-
103-
$expires_timestamp = $expired_at->getTimestamp();
104-
$now_timestamp = \Minz\Time::now()->getTimestamp();
105-
106-
$duration = $expires_timestamp - $now_timestamp;
107-
} elseif ($response->status === 429) {
108-
$retry_at = $this->parseHttpDate($retry_after);
109-
110-
if ($retry_at === null) {
111-
$duration = (int) $retry_after;
112-
} else {
113-
$retry_at_timestamp = $retry_at->getTimestamp();
114-
$now_timestamp = \Minz\Time::now()->getTimestamp();
115-
116-
$duration = $retry_at_timestamp - $now_timestamp;
117-
}
118-
}
119-
120-
$duration = max($this->min_duration, $duration);
121-
$duration = min($this->max_duration, $duration);
122-
$expiration = \Minz\Time::fromNow($duration, 'seconds');
123-
12481
$response_text = (string) $response;
12582
$response_compressed = @gzencode($response_text);
12683

@@ -130,64 +87,11 @@ public function saveResponse(string $url, Response $response): void
13087

13188
$cache_item = $this->getItem($url);
13289
$cache_item->set($response_compressed);
133-
$cache_item->expiresAt($expiration);
90+
$cache_item->expiresAt($response->getRetryAfter());
13491
$result = $this->save($cache_item);
13592

13693
if ($result === false) {
13794
throw new CacheError("Response to {$url} cannot be saved in cache.");
13895
}
13996
}
140-
141-
/**
142-
* Parses a "Cache-Control" HTTP header and returns an array with the
143-
* different cache directives.
144-
*
145-
* @return array<string, string|true>
146-
*/
147-
private function parseCacheControl(string $cache_control): array
148-
{
149-
$directives = [];
150-
151-
$cache_control_parts = explode(',', $cache_control);
152-
153-
foreach ($cache_control_parts as $part) {
154-
$part = trim($part);
155-
156-
if (str_contains($part, '=')) {
157-
list($directive, $value) = explode('=', $part, 2);
158-
} else {
159-
$directive = $part;
160-
$value = true;
161-
}
162-
163-
$directive = strtolower($directive);
164-
165-
$directives[$directive] = $value;
166-
}
167-
168-
return $directives;
169-
}
170-
171-
/**
172-
* Parses an HTTP date header.
173-
*/
174-
private function parseHttpDate(string $expires): ?\DateTimeImmutable
175-
{
176-
$formats = [
177-
\DateTimeInterface::RFC7231,
178-
\DateTimeInterface::RFC850,
179-
// Ignore the ANSI C's asctime() format as obsolete and more
180-
// difficult to parse.
181-
];
182-
183-
foreach ($formats as $format) {
184-
$expired_at = \DateTimeImmutable::createFromFormat($format, $expires);
185-
186-
if ($expired_at) {
187-
return $expired_at;
188-
}
189-
}
190-
191-
return null;
192-
}
19397
}

tests/http/CacheTest.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,6 @@ public static function unsupportedStatusCodesProvider(): array
327327
[103],
328328
[206],
329329
[304],
330-
[500],
331-
[501],
332-
[502],
333-
[503],
334-
[504],
335330
];
336331
}
337332
}

0 commit comments

Comments
 (0)