From fbc3584f6b2893e099341df028cbbf27c57b0ca6 Mon Sep 17 00:00:00 2001 From: cyberpower678 Date: Thu, 8 Feb 2024 20:47:54 -0500 Subject: [PATCH] Raise flag on malformed URLs prevent the submission of malformed URLs to the Wayback Machine --- app/src/Core/parse.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/src/Core/parse.php b/app/src/Core/parse.php index 57a0b5ab..43f90cf4 100644 --- a/app/src/Core/parse.php +++ b/app/src/Core/parse.php @@ -316,7 +316,7 @@ public function analyzePage( &$modifiedLinks = [], $webRequest = false, &$editEr ) { //Populate URLs to submit for archiving. if( $i == 1 ) { - $toArchive["$tid:$id"] = $link['url']; + if( !isset( $link['malformed_url'] ) ) $toArchive["$tid:$id"] = $link['url']; } else { //If it archived, then tally the success, otherwise, note it. if( $archiveResponse["$tid:$id"] === true ) { @@ -331,7 +331,7 @@ public function analyzePage( &$modifiedLinks = [], $webRequest = false, &$editEr ) { //Populate URLs to submit for archiving. if( $i == 1 ) { - $toArchive[$tid] = $link['url']; + if( !isset( $link['malformed_url'] ) ) $toArchive[$tid] = $link['url']; } else { //If it archived, then tally the success, otherwise, note it. if( $archiveResponse[$tid] === true ) { @@ -2390,6 +2390,10 @@ public function getLinkDetails( $linkString, $remainder ) { unset( $returnArray['original_url'] ); } + if( empty( strtolower( parse_url( $this->deadCheck->sanitizeURL( $returnArray['url'] ), PHP_URL_SCHEME ) ) ) ) { + $returnArray['malformed_url'] = true; + } + if( isset( $returnArray['archive_template'] ) ) { if( isset( $returnArray['archive_template']['parameters']['__FORMAT__'] ) ) { $returnArray['archive_template']['format'] =