From 0e32503e986173cdfe5feb89134aed5fa79d9ef8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 10 Jun 2025 00:35:03 -0400 Subject: [PATCH 1/7] Add support for YYYY-MM style date strings with only year and month, no day --- src/DateInStringFinder.php | 9 +++++++++ tests/DateInStringFinderTest.php | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index dda1733..1705272 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -108,6 +108,15 @@ public static function find(string $string): array } } + // Match YYYY-MM to Year and Month if not already set: + if ($year === null && $month === null) { + preg_match('/\d{4}\-\d{2}/', $string, $matches_year_month); + if ($matches_year_month && $matches_year_month[0]) { + $year_month = $matches_year_month[0]; + [$year, $month] = explode('-', $year_month); + } + } + // Match Year if not already set: if ($year === null) { preg_match('/\d{4}/', $string, $matches_year); diff --git a/tests/DateInStringFinderTest.php b/tests/DateInStringFinderTest.php index 8e1e426..11e5487 100644 --- a/tests/DateInStringFinderTest.php +++ b/tests/DateInStringFinderTest.php @@ -80,6 +80,14 @@ public function dataStringDataProvider(): array 'year' => 11, ], ], + [ + 'uploads/2025-06/example.pdf', + [ + 'day' => null, + 'month' => 06, + 'year' => 2025, + ], + ], [ '1 2 1985', [ From defc07c1e47cb9589fbd60a5ecd5660871464e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 10 Jun 2025 16:36:23 -0400 Subject: [PATCH 2/7] Try to get four-digit years before risking false positives on day/month getting two-digit years MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example, this string: '2025-06/10/19' — 10 is the day and 19 is the hour --- src/DateInStringFinder.php | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index 1705272..e18fb4f 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -124,6 +124,13 @@ public static function find(string $string): array $year = $matches_year[0]; } } + + // Only if we did not succeed in getting a year do we try to find + // two-digit years. And maybe only if no day or month either? + if ($year === null) { + [$day, $month, $year] = self::getSimpleDate($string, TRUE) ?? self::getComplexDate($string, TRUE); + } + if ($year === null) { preg_match('/\'(\d{2})/', $string, $matches_year); if ($matches_year && $matches_year[1]) { @@ -143,11 +150,16 @@ public static function find(string $string): array /** * @return int[]|null */ - private static function getSimpleDate(string $string): ?array + private static function getSimpleDate(string $string, bool $two_digit_year = FALSE): ?array { + $year_pattern = '(\d{4})'; + if ($two_digit_year) { + $year_pattern = '(\d{2})'; + } + // Match dates: 01/01/2012 or 30-12-11 or 1 2 1985 preg_match( - '/(\d?\d)([.\-\/ ])+([0-1]?\d)\2+(\d{2,4})/', + '/(\d?\d)([.\-\/ ])+([0-1]?\d)\2+' . $year_pattern . '/', $string, $matches ); @@ -162,11 +174,15 @@ private static function getSimpleDate(string $string): ?array return null; } - private static function getComplexDate(string $string): ?array + private static function getComplexDate(string $string, bool $two_digit_year = FALSE): ?array { + $year_pattern = '(\d{4})'; + if ($two_digit_year) { + $year_pattern = '\'(\d{2})'; + } // Match dates: Sunday 1st March 2015; Sunday, 1 March 2015; Sun 1 Mar 2015; Sun-1-March-2015 preg_match( - '/(?:(?:'.implode('|', self::DAYS).'|'.implode('|', self::SHORT_DAYS).')[ ,\-_\/]*)?(\d?\d)[ ,\-_\/]*(?:'.implode('|', self::ORDINALS).')?[ ,\-_\/(?:of)]*('.implode('|', self::MONTHS).'|'.implode('|', self::SHORT_MONTHS).')\b(?:[ ,\-_\/]+(?:(\d{4})|\'(\d{2})))?/i', + '/(?:(?:'.implode('|', self::DAYS).'|'.implode('|', self::SHORT_DAYS).')[ ,\-_\/]*)?(\d?\d)[ ,\-_\/]*(?:'.implode('|', self::ORDINALS).')?[ ,\-_\/(?:of)]*('.implode('|', self::MONTHS).'|'.implode('|', self::SHORT_MONTHS).')\b(?:[ ,\-_\/]+(?:' .$year_pattern . '))?/i', $string, $matches ); @@ -180,7 +196,7 @@ private static function getComplexDate(string $string): ?array // Match dates: March 1st 2015; March 1 2015; March-1st-2015 preg_match( - '/('.implode('|', self::MONTHS).'|'.implode('|', self::SHORT_MONTHS).')\b[ ,\-_\/]*(\d?\d)[ ,\-_\/]*(?:'.implode('|', self::ORDINALS).')?[ ,\-_\/]+(?:(\d{4})|\'(\d{2}))/i', + '/('.implode('|', self::MONTHS).'|'.implode('|', self::SHORT_MONTHS).')\b[ ,\-_\/]*(\d?\d)[ ,\-_\/]*(?:'.implode('|', self::ORDINALS).')?[ ,\-_\/]+(?:' . $year_pattern . ')/i', $string, $matches ); From 02b78060fa0ce03c7a8265a37f81a92d8fd1f876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Mon, 16 Jun 2025 21:55:54 -0400 Subject: [PATCH 3/7] Add tests and try add capabilities to make them work --- src/DateInStringFinder.php | 14 +++++++++++++- tests/DateInStringFinderTest.php | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index e18fb4f..3eea036 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -85,6 +85,7 @@ final class DateInStringFinder */ public static function find(string $string): array { + $day = null; $month = null; $year = null; @@ -108,11 +109,22 @@ public static function find(string $string): array } } + // Match YYYY-MM-DD to year, month, and day if not already set. + if ($year === null && $month === null) { + preg_match('/\d{4}[-./]d{2}[-./]d{2}/', $string, $matches_year_month_day); + if ($matches_year_month_day && $matches_year_month_day[0]) { + $year_month_day = $matches_year_month_day[0]; + $year_month_day = str_replace(['.', '/'], '-', $year_month_day); + [$year, $month, $day] = explode('-', $year_month); + } + } + // Match YYYY-MM to Year and Month if not already set: if ($year === null && $month === null) { - preg_match('/\d{4}\-\d{2}/', $string, $matches_year_month); + preg_match('/\d{4}[-./]d{2}/', $string, $matches_year_month); if ($matches_year_month && $matches_year_month[0]) { $year_month = $matches_year_month[0]; + $year_month = str_replace(['.', '/'], '-', $year_month); [$year, $month] = explode('-', $year_month); } } diff --git a/tests/DateInStringFinderTest.php b/tests/DateInStringFinderTest.php index 11e5487..c44376b 100644 --- a/tests/DateInStringFinderTest.php +++ b/tests/DateInStringFinderTest.php @@ -88,6 +88,30 @@ public function dataStringDataProvider(): array 'year' => 2025, ], ], + [ + 'uploads/2025-06/17/example.pdf', + [ + 'day' => 17, + 'month' => 06, + 'year' => 2025, + ], + ], + [ + 'uploads/2025-06/example.pdf', + [ + 'day' => null, + 'month' => 06, + 'year' => 2025, + ], + ], + [ + '2025-06-17', + [ + 'day' => 17, + 'month' => 06, + 'year' => 2025, + ], + ], [ '1 2 1985', [ From 05129c24cc39ec48d67d5999ed81260ab5dae1f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Mon, 16 Jun 2025 23:21:12 -0400 Subject: [PATCH 4/7] Add YYYY-MM-DD check and make it and YYYY-MM flexible for separators --- src/DateInStringFinder.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index 3eea036..bc95b30 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -110,18 +110,18 @@ public static function find(string $string): array } // Match YYYY-MM-DD to year, month, and day if not already set. - if ($year === null && $month === null) { - preg_match('/\d{4}[-./]d{2}[-./]d{2}/', $string, $matches_year_month_day); + if ($year === null && $month === null && $day === null) { + preg_match('/\d{4}[\-\.\/]\d{2}[\-\.\/]\d{2}/', $string, $matches_year_month_day); if ($matches_year_month_day && $matches_year_month_day[0]) { $year_month_day = $matches_year_month_day[0]; $year_month_day = str_replace(['.', '/'], '-', $year_month_day); - [$year, $month, $day] = explode('-', $year_month); + [$year, $month, $day] = explode('-', $year_month_day); } } // Match YYYY-MM to Year and Month if not already set: if ($year === null && $month === null) { - preg_match('/\d{4}[-./]d{2}/', $string, $matches_year_month); + preg_match('/\d{4}[\-\.\/]\d{2}/', $string, $matches_year_month); if ($matches_year_month && $matches_year_month[0]) { $year_month = $matches_year_month[0]; $year_month = str_replace(['.', '/'], '-', $year_month); From 6b5c8fc57123be927ec7f82cbd214cc7c9348440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 17 Jun 2025 16:33:19 -0400 Subject: [PATCH 5/7] Add elegant 'ISO' date matcher that i forgot to commit and re-did ish later --- src/DateInStringFinder.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index bc95b30..21e2bf0 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -90,7 +90,7 @@ public static function find(string $string): array $month = null; $year = null; - [$day, $month, $year] = self::getSimpleDate($string) ?? self::getComplexDate($string); + [$day, $month, $year] = self::getIsoDate($string) ?? self::getSimpleDate($string) ?? self::getComplexDate($string); // Match month name: if ($month === null) { @@ -229,6 +229,26 @@ private static function getComplexDate(string $string, bool $two_digit_year = FA ]; } + /** + * @return int[]|null + */ + private static function getIsoDate(string $string): ?array + { + + // Match dates: 2025-06-30 or 2025/06/30 or 2025 06 30 as well as + // more unusual combinations likely in directories like 2025-06/30/19 + preg_match('/(\d{4})[.\-\/ ](\d{2})[.\-\/ ](\d{2})/', $string, $matches); + if (($matches[1] ?? null) !== null && ($matches[2] ?? null) !== null && ($matches[3] ?? null) !== null) { + return [ + $matches[3] ?? null, + $matches[2] ?? null, + $matches[1] ?? null, + ]; + } + + return null; + } + private static function getMonthNumber(string $initialMonth): ?int { $month = array_search(strtolower($initialMonth), self::SHORT_MONTHS, true); From ddf0bf37f8d0a6329d6a0d6b5e0dbf9dd3f78588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 17 Jun 2025 17:28:53 -0400 Subject: [PATCH 6/7] Remove redundant 'ISO' date match --- src/DateInStringFinder.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index 21e2bf0..35dc9bb 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -109,16 +109,6 @@ public static function find(string $string): array } } - // Match YYYY-MM-DD to year, month, and day if not already set. - if ($year === null && $month === null && $day === null) { - preg_match('/\d{4}[\-\.\/]\d{2}[\-\.\/]\d{2}/', $string, $matches_year_month_day); - if ($matches_year_month_day && $matches_year_month_day[0]) { - $year_month_day = $matches_year_month_day[0]; - $year_month_day = str_replace(['.', '/'], '-', $year_month_day); - [$year, $month, $day] = explode('-', $year_month_day); - } - } - // Match YYYY-MM to Year and Month if not already set: if ($year === null && $month === null) { preg_match('/\d{4}[\-\.\/]\d{2}/', $string, $matches_year_month); From 16e1de1ce85dc73d328d8239c259ab365fcb2767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?benjamin=20melan=C3=A7on?= Date: Tue, 17 Jun 2025 18:13:29 -0400 Subject: [PATCH 7/7] OK all tests pass if we do this right away But feels like we should do it last but take care to *not* override month or day if those were gotten another way, though not sure there is any sure way to get best data. --- src/DateInStringFinder.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/DateInStringFinder.php b/src/DateInStringFinder.php index 35dc9bb..e561ca7 100644 --- a/src/DateInStringFinder.php +++ b/src/DateInStringFinder.php @@ -92,6 +92,12 @@ public static function find(string $string): array [$day, $month, $year] = self::getIsoDate($string) ?? self::getSimpleDate($string) ?? self::getComplexDate($string); + // Only if we did not succeed in getting a year do we try to find + // two-digit years. And maybe only if no day or month either? + if ($year === null) { + [$day, $month, $year] = self::getSimpleDate($string, TRUE) ?? self::getComplexDate($string, TRUE); + } + // Match month name: if ($month === null) { $month = self::getMonth($string); @@ -127,12 +133,6 @@ public static function find(string $string): array } } - // Only if we did not succeed in getting a year do we try to find - // two-digit years. And maybe only if no day or month either? - if ($year === null) { - [$day, $month, $year] = self::getSimpleDate($string, TRUE) ?? self::getComplexDate($string, TRUE); - } - if ($year === null) { preg_match('/\'(\d{2})/', $string, $matches_year); if ($matches_year && $matches_year[1]) {