Skip to content

Commit

Permalink
Merge pull request #128 from internetarchive/schema-rewrtie
Browse files Browse the repository at this point in the history
Raise flag on malformed URLs
  • Loading branch information
cyberpower678 committed Feb 9, 2024
2 parents d61f73d + fbc3584 commit 857e4e0
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions app/src/Core/parse.php
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ public function analyzePage( &$modifiedLinks = [], $webRequest = false, &$editEr
) {
//Populate URLs to submit for archiving.
if( $i == 1 ) {
$toArchive["$tid:$id"] = $link['url'];
if( !isset( $link['malformed_url'] ) ) $toArchive["$tid:$id"] = $link['url'];
} else {
//If it archived, then tally the success, otherwise, note it.
if( $archiveResponse["$tid:$id"] === true ) {
Expand All @@ -331,7 +331,7 @@ public function analyzePage( &$modifiedLinks = [], $webRequest = false, &$editEr
) {
//Populate URLs to submit for archiving.
if( $i == 1 ) {
$toArchive[$tid] = $link['url'];
if( !isset( $link['malformed_url'] ) ) $toArchive[$tid] = $link['url'];
} else {
//If it archived, then tally the success, otherwise, note it.
if( $archiveResponse[$tid] === true ) {
Expand Down Expand Up @@ -2390,6 +2390,10 @@ public function getLinkDetails( $linkString, $remainder ) {
unset( $returnArray['original_url'] );
}

if( empty( strtolower( parse_url( $this->deadCheck->sanitizeURL( $returnArray['url'] ), PHP_URL_SCHEME ) ) ) ) {
$returnArray['malformed_url'] = true;
}

if( isset( $returnArray['archive_template'] ) ) {
if( isset( $returnArray['archive_template']['parameters']['__FORMAT__'] ) ) {
$returnArray['archive_template']['format'] =
Expand Down

0 comments on commit 857e4e0

Please sign in to comment.