From 1f31d46218de5a1acf112bdf5f7d1bd884ce98a9 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Fri, 1 Oct 2021 16:33:26 +0000 Subject: [PATCH 01/23] Docker for testing --- Dockerfile | 44 ++++++++++++++++++++++++++++++++++++++++++++ deploy/udoit-ng.sh | 31 +++++++++++++++++++++++++++++++ docker-compose.yml | 14 ++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 Dockerfile create mode 100644 deploy/udoit-ng.sh create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ccd2914 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,44 @@ +FROM php:7.4-fpm +ARG ENVIORNMENT_TYPE + +#Install dependencies and php extensions +RUN apt-get update && apt-get install -y \ + git \ + libfreetype6-dev \ + libjpeg62-turbo-dev \ + libpng-dev \ + libpq-dev \ + unzip \ + wget \ + supervisor \ + apache2 \ + && docker-php-ext-configure gd \ + && docker-php-ext-install -j$(nproc) gd + +#Install AWS CLI v2 +RUN if [ "$ENVIORNMENT_TYPE" != "local" ] ;then \ + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install\ + ;fi + +RUN echo "ServerName localhost" >> /etc/apache2/apache2.conf +RUN apachectl start + +#Create user ssm-user +RUN useradd -ms /bin/bash ssm-user +RUN mkdir -p /var/www/html \ + && chown ssm-user:www-data /var/www/html + +#install composer +COPY --from=composer:latest /usr/bin/composer /usr/local/bin/composer + +#Copy over files +COPY --chown=ssm-user:www-data . /var/www/html/ + +WORKDIR /var/www/html +#run setup script +RUN chmod +x deploy/udoit-ng.sh +RUN deploy/udoit-ng.sh + +CMD php-fpm diff --git a/deploy/udoit-ng.sh b/deploy/udoit-ng.sh new file mode 100644 index 0000000..cbbb884 --- /dev/null +++ b/deploy/udoit-ng.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# change to the new file location +cd /var/www/html + +# copy localConfig from S3 if you are not on local +if [ "$ENVIORNMENT_TYPE" != "local" ] +then + aws s3 cp s3://cidilabs-devops/udoit3/.env.local.$ENVIORNMENT_TYPE /var/www/html/.env.local +fi + +# run composer install +composer install --no-dev --no-interaction --no-progress --optimize-autoloader + +# change all file and directory permissions to give apache sufficient access +sudo find /var/www/html -type f -exec chmod 664 {} + -o -type d -exec chmod 775 {} + + +# only setup newrelic if not on local. +if [ "$ENVIORNMENT_TYPE" != "local" ] +then + # create .user.ini file for New Relic (PHP-FPM only) + touch /var/www/html/public/.user.ini + # add New Relic appname + echo -e "\nnewrelic.appname = \"$NEW_RELIC_APP_NAME\"" >> /var/www/html/public/.user.ini +fi + +# start queue monitor +/usr/bin/supervisord + +# restart apaches +# apachectl restart diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..833db8f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,14 @@ +version: '3.3' + +services: + php: + build: + context: . + dockerfile: Dockerfile + volumes: + - .:/var/www/html + ports: + - "8000:8000" +volumes: + web: + dbdata: From 34d7ecefb208ccd524377b7220b5f44aa8d5fc94 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Mon, 4 Oct 2021 09:32:04 -0400 Subject: [PATCH 02/23] Add broken and redirected link rule --- src/Rule/BrokenRedirectedLink.php | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/Rule/BrokenRedirectedLink.php diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php new file mode 100644 index 0000000..a863e00 --- /dev/null +++ b/src/Rule/BrokenRedirectedLink.php @@ -0,0 +1,31 @@ +getAllElements('a') as $a) { + $link = $a->getAttribute('href'); + if ($link) { + $this->setIssue($a); + } + } + + return count($this->issues); + } + +} From a52ee2b924307812dd4acc9954c0e901f2942439 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Mon, 4 Oct 2021 09:32:12 -0400 Subject: [PATCH 03/23] Add broken and redirected link test --- tests/BrokenRedirectedLinkTest.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/BrokenRedirectedLinkTest.php diff --git a/tests/BrokenRedirectedLinkTest.php b/tests/BrokenRedirectedLinkTest.php new file mode 100644 index 0000000..9d0819f --- /dev/null +++ b/tests/BrokenRedirectedLinkTest.php @@ -0,0 +1,15 @@ +I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenRedirectedLink($dom); + + $this->assertEquals(1, $rule->check(), 'Broken or Redirected Link should have one issue.'); + } +} From 4986b9aba39ddc12f078683e3ac4cba74b4e5701 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Wed, 6 Oct 2021 10:04:16 -0400 Subject: [PATCH 04/23] Link checker baseline --- src/Rule/BrokenRedirectedLink.php | 49 +++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php index a863e00..cfd1820 100644 --- a/src/Rule/BrokenRedirectedLink.php +++ b/src/Rule/BrokenRedirectedLink.php @@ -5,27 +5,64 @@ use DOMElement; /** -* +* Links that are broken need to be removed or manually updated. +* Links that are redirected should be updated with the new link. +* Based on UDOIT 2.8.X https://github.com/ucfopen/UDOIT/blob/classic/lib/Udoit.php +* contributions by Emily Sachs */ class BrokenRedirectedLink extends BaseRule { - public function id() { return self::class; } + private function linkCheck($links) { + $curls = array(); + $mcurl = curl_multi_init(); + foreach (array_keys($links) as $i => $link) { + $curls[$i] = curl_init(); + curl_setopt($curls[$i], CURLOPT_URL, $link); + curl_setopt($curls[$i], CURLOPT_HEADER, true); + curl_setopt($curls[$i], CURLOPT_NOBODY, true); + curl_setopt($curls[$i], CURLOPT_REFERER, true); + curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); + curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); + curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); + curl_multi_add_handle($mcurl, $curls[$i]); + } + $running = null; + do { + curl_multi_exec($mcurl, $running); + } while ($running > 0); + foreach (array_keys($links) as $i => $link) { + $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); + $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); + if ($link != $redirect) { + // Redirected link (May be a Canvas link that is not actually redirected) + $this->setIssue($links[$link], $metadata = $redirect); + } + if (404 == $status) { + $this->setIssue($links[$link]); + } + curl_multi_remove_handle($mcurl, $curls[$i]); + } + curl_multi_close($mcurl); + } + public function check() { + $links = array(); foreach ($this->getAllElements('a') as $a) { - $link = $a->getAttribute('href'); - if ($link) { - $this->setIssue($a); + $href = $a->getAttribute('href'); + if ($href) { + $links[$href] = $a; // href should exclude start with '#' } } + $this->linkCheck($links); return count($this->issues); } - } From 63875e1b3f50491925c81ba25eeb8f35a8d5c0e9 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Fri, 8 Oct 2021 14:38:22 -0400 Subject: [PATCH 05/23] Include metadata of redirected url for BrokenRedirectedLink rule --- src/Rule/BrokenRedirectedLink.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php index cfd1820..cc92cc3 100644 --- a/src/Rule/BrokenRedirectedLink.php +++ b/src/Rule/BrokenRedirectedLink.php @@ -42,7 +42,7 @@ private function linkCheck($links) { $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); if ($link != $redirect) { // Redirected link (May be a Canvas link that is not actually redirected) - $this->setIssue($links[$link], $metadata = $redirect); + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); } if (404 == $status) { $this->setIssue($links[$link]); From bfe3198ed6da1a2d9de36e50f54454c217df7d69 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Fri, 8 Oct 2021 14:43:40 -0400 Subject: [PATCH 06/23] Add redirected tests to BrokenRedirectedLinkTest --- tests/BrokenRedirectedLinkTest.php | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/BrokenRedirectedLinkTest.php b/tests/BrokenRedirectedLinkTest.php index 9d0819f..fcf5c20 100644 --- a/tests/BrokenRedirectedLinkTest.php +++ b/tests/BrokenRedirectedLinkTest.php @@ -3,9 +3,29 @@ use CidiLabs\PhpAlly\Rule\BrokenRedirectedLink; class BrokenRedirectedLinkTest extends PhpAllyTestCase { - public function testCheckAlive() + public function testCheckBroken() { - $html = '
I am a link.
'; + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenRedirectedLink($dom); + + $this->assertEquals(1, $rule->check(), 'Broken or Redirected Link should have one issue.'); + } + + public function testCheckNotRedirected() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenRedirectedLink($dom); + + $this->assertEquals(0, $rule->check(), 'Broken or Redirected Link should have no issue.'); + } + + public function testCheckRedirected() + { + $html = '
I am a link.
'; $dom = new \DOMDocument('1.0', 'utf-8'); $dom->loadHTML($html); $rule = new BrokenRedirectedLink($dom); From c72b1ffe3693a00b0aa82b058919a4a0be341214 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Fri, 8 Oct 2021 15:06:10 -0400 Subject: [PATCH 07/23] Add regex to BrokenRedirectedLink rule --- src/Rule/BrokenRedirectedLink.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php index cc92cc3..4937501 100644 --- a/src/Rule/BrokenRedirectedLink.php +++ b/src/Rule/BrokenRedirectedLink.php @@ -42,7 +42,15 @@ private function linkCheck($links) { $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); if ($link != $redirect) { // Redirected link (May be a Canvas link that is not actually redirected) - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + $ref = $redirect; + preg_match('/^[^#\s]+/', $ref, $matches); + $base = $matches[0]; + $base = preg_replace('/\/$/', '', $base); + $base = preg_replace('/www\./', '', $base); + $base = preg_replace('/http[s]{0,1}:\/\//', '', $base); + if (strpos($link, $base) === false) { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + } } if (404 == $status) { $this->setIssue($links[$link]); From ad108908501c979cc235ceb60f261f5240cc03e3 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Fri, 8 Oct 2021 16:44:47 -0400 Subject: [PATCH 08/23] Add BrokenRedirectedLink to rules json --- src/rules.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rules.json b/src/rules.json index 9802b43..89eada4 100644 --- a/src/rules.json +++ b/src/rules.json @@ -35,5 +35,6 @@ "CidiLabs\\PhpAlly\\Rule\\VideoEmbedCheck", "CidiLabs\\PhpAlly\\Rule\\VideoProvidesCaptions", "CidiLabs\\PhpAlly\\Rule\\VideosEmbeddedOrLinkedNeedCaptions", - "CidiLabs\\PhpAlly\\Rule\\VideosHaveAutoGeneratedCaptions" + "CidiLabs\\PhpAlly\\Rule\\VideosHaveAutoGeneratedCaptions", + "CidiLabs\\PhpAlly\\Rule\\BrokenRedirectedLink" ] From ce5a34315ba567c924122f9740514b3b3715fc11 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Mon, 11 Oct 2021 11:43:42 -0400 Subject: [PATCH 09/23] Add test checking that a redirect link is contained in metadata --- tests/BrokenRedirectedLinkTest.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/BrokenRedirectedLinkTest.php b/tests/BrokenRedirectedLinkTest.php index fcf5c20..36c1da2 100644 --- a/tests/BrokenRedirectedLinkTest.php +++ b/tests/BrokenRedirectedLinkTest.php @@ -32,4 +32,21 @@ public function testCheckRedirected() $this->assertEquals(1, $rule->check(), 'Broken or Redirected Link should have one issue.'); } + + public function testCheckRedirectedAndMetadata() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenRedirectedLink($dom); + + // Check if metadata is present with a new link + $result = $rule->check(); + if ($rule->getIssues() && count($rule->getIssues()) == 1) { + $meta = $rule->getIssues()[0]->getMetadata(); + $result = 1 + $result; + } + + $this->assertEquals(2, $result, 'Broken or Redirected Link should have one issue.'); + } } From 9053e5339df3e3150a8b6ebf7fbe2ccfe0d02384 Mon Sep 17 00:00:00 2001 From: Cristopher Matos Date: Wed, 13 Oct 2021 09:52:37 -0400 Subject: [PATCH 10/23] Clean up docker testing components and comments --- Dockerfile | 44 ------------------------------- deploy/udoit-ng.sh | 31 ---------------------- docker-compose.yml | 14 ---------- src/Rule/BrokenRedirectedLink.php | 2 +- 4 files changed, 1 insertion(+), 90 deletions(-) delete mode 100644 Dockerfile delete mode 100644 deploy/udoit-ng.sh delete mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index ccd2914..0000000 --- a/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -FROM php:7.4-fpm -ARG ENVIORNMENT_TYPE - -#Install dependencies and php extensions -RUN apt-get update && apt-get install -y \ - git \ - libfreetype6-dev \ - libjpeg62-turbo-dev \ - libpng-dev \ - libpq-dev \ - unzip \ - wget \ - supervisor \ - apache2 \ - && docker-php-ext-configure gd \ - && docker-php-ext-install -j$(nproc) gd - -#Install AWS CLI v2 -RUN if [ "$ENVIORNMENT_TYPE" != "local" ] ;then \ - curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ - && unzip awscliv2.zip \ - && ./aws/install\ - ;fi - -RUN echo "ServerName localhost" >> /etc/apache2/apache2.conf -RUN apachectl start - -#Create user ssm-user -RUN useradd -ms /bin/bash ssm-user -RUN mkdir -p /var/www/html \ - && chown ssm-user:www-data /var/www/html - -#install composer -COPY --from=composer:latest /usr/bin/composer /usr/local/bin/composer - -#Copy over files -COPY --chown=ssm-user:www-data . /var/www/html/ - -WORKDIR /var/www/html -#run setup script -RUN chmod +x deploy/udoit-ng.sh -RUN deploy/udoit-ng.sh - -CMD php-fpm diff --git a/deploy/udoit-ng.sh b/deploy/udoit-ng.sh deleted file mode 100644 index cbbb884..0000000 --- a/deploy/udoit-ng.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# change to the new file location -cd /var/www/html - -# copy localConfig from S3 if you are not on local -if [ "$ENVIORNMENT_TYPE" != "local" ] -then - aws s3 cp s3://cidilabs-devops/udoit3/.env.local.$ENVIORNMENT_TYPE /var/www/html/.env.local -fi - -# run composer install -composer install --no-dev --no-interaction --no-progress --optimize-autoloader - -# change all file and directory permissions to give apache sufficient access -sudo find /var/www/html -type f -exec chmod 664 {} + -o -type d -exec chmod 775 {} + - -# only setup newrelic if not on local. -if [ "$ENVIORNMENT_TYPE" != "local" ] -then - # create .user.ini file for New Relic (PHP-FPM only) - touch /var/www/html/public/.user.ini - # add New Relic appname - echo -e "\nnewrelic.appname = \"$NEW_RELIC_APP_NAME\"" >> /var/www/html/public/.user.ini -fi - -# start queue monitor -/usr/bin/supervisord - -# restart apaches -# apachectl restart diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 833db8f..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,14 +0,0 @@ -version: '3.3' - -services: - php: - build: - context: . - dockerfile: Dockerfile - volumes: - - .:/var/www/html - ports: - - "8000:8000" -volumes: - web: - dbdata: diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php index 4937501..aa83a37 100644 --- a/src/Rule/BrokenRedirectedLink.php +++ b/src/Rule/BrokenRedirectedLink.php @@ -66,7 +66,7 @@ public function check() foreach ($this->getAllElements('a') as $a) { $href = $a->getAttribute('href'); if ($href) { - $links[$href] = $a; // href should exclude start with '#' + $links[$href] = $a; } } $this->linkCheck($links); From cd87784e0668f821fc0c06b680fdf9e01636b095 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Tue, 19 Oct 2021 15:17:34 -0400 Subject: [PATCH 11/23] Split broken and redirected link tests --- src/Rule/BrokenRedirectedLink.php | 76 ------------------------------ src/rules.json | 5 +- tests/BrokenRedirectedLinkTest.php | 52 -------------------- 3 files changed, 3 insertions(+), 130 deletions(-) delete mode 100644 src/Rule/BrokenRedirectedLink.php delete mode 100644 tests/BrokenRedirectedLinkTest.php diff --git a/src/Rule/BrokenRedirectedLink.php b/src/Rule/BrokenRedirectedLink.php deleted file mode 100644 index aa83a37..0000000 --- a/src/Rule/BrokenRedirectedLink.php +++ /dev/null @@ -1,76 +0,0 @@ - $link) { - $curls[$i] = curl_init(); - curl_setopt($curls[$i], CURLOPT_URL, $link); - curl_setopt($curls[$i], CURLOPT_HEADER, true); - curl_setopt($curls[$i], CURLOPT_NOBODY, true); - curl_setopt($curls[$i], CURLOPT_REFERER, true); - curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); - curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); - curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); - curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); - curl_multi_add_handle($mcurl, $curls[$i]); - } - $running = null; - do { - curl_multi_exec($mcurl, $running); - } while ($running > 0); - foreach (array_keys($links) as $i => $link) { - $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); - $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); - if ($link != $redirect) { - // Redirected link (May be a Canvas link that is not actually redirected) - $ref = $redirect; - preg_match('/^[^#\s]+/', $ref, $matches); - $base = $matches[0]; - $base = preg_replace('/\/$/', '', $base); - $base = preg_replace('/www\./', '', $base); - $base = preg_replace('/http[s]{0,1}:\/\//', '', $base); - if (strpos($link, $base) === false) { - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); - } - } - if (404 == $status) { - $this->setIssue($links[$link]); - } - curl_multi_remove_handle($mcurl, $curls[$i]); - } - curl_multi_close($mcurl); - } - - public function check() - { - $links = array(); - foreach ($this->getAllElements('a') as $a) { - $href = $a->getAttribute('href'); - if ($href) { - $links[$href] = $a; - } - } - $this->linkCheck($links); - - return count($this->issues); - } -} diff --git a/src/rules.json b/src/rules.json index 89eada4..514369c 100644 --- a/src/rules.json +++ b/src/rules.json @@ -5,6 +5,7 @@ "CidiLabs\\PhpAlly\\Rule\\AnchorSuspiciousLinkText", "CidiLabs\\PhpAlly\\Rule\\BaseFontIsNotUsed", "CidiLabs\\PhpAlly\\Rule\\BlinkIsNotUsed", + "CidiLabs\\PhpAlly\\Rule\\BrokenLink", "CidiLabs\\PhpAlly\\Rule\\ContentTooLong", "CidiLabs\\PhpAlly\\Rule\\CssTextHasContrast", "CidiLabs\\PhpAlly\\Rule\\CssTextStyleEmphasize", @@ -29,12 +30,12 @@ "CidiLabs\\PhpAlly\\Rule\\ObjectTagDetected", "CidiLabs\\PhpAlly\\Rule\\ParagraphNotUsedAsHeader", "CidiLabs\\PhpAlly\\Rule\\PreShouldNotBeUsedForTabularValues", + "CidiLabs\\PhpAlly\\Rule\\RedirectedLink", "CidiLabs\\PhpAlly\\Rule\\TableDataShouldHaveTableHeader", "CidiLabs\\PhpAlly\\Rule\\TableHeaderShouldHaveScope", "CidiLabs\\PhpAlly\\Rule\\VideoCaptionsMatchCourseLanguage", "CidiLabs\\PhpAlly\\Rule\\VideoEmbedCheck", "CidiLabs\\PhpAlly\\Rule\\VideoProvidesCaptions", "CidiLabs\\PhpAlly\\Rule\\VideosEmbeddedOrLinkedNeedCaptions", - "CidiLabs\\PhpAlly\\Rule\\VideosHaveAutoGeneratedCaptions", - "CidiLabs\\PhpAlly\\Rule\\BrokenRedirectedLink" + "CidiLabs\\PhpAlly\\Rule\\VideosHaveAutoGeneratedCaptions" ] diff --git a/tests/BrokenRedirectedLinkTest.php b/tests/BrokenRedirectedLinkTest.php deleted file mode 100644 index 36c1da2..0000000 --- a/tests/BrokenRedirectedLinkTest.php +++ /dev/null @@ -1,52 +0,0 @@ -I am a link.
'; - $dom = new \DOMDocument('1.0', 'utf-8'); - $dom->loadHTML($html); - $rule = new BrokenRedirectedLink($dom); - - $this->assertEquals(1, $rule->check(), 'Broken or Redirected Link should have one issue.'); - } - - public function testCheckNotRedirected() - { - $html = '
I am a link.
'; - $dom = new \DOMDocument('1.0', 'utf-8'); - $dom->loadHTML($html); - $rule = new BrokenRedirectedLink($dom); - - $this->assertEquals(0, $rule->check(), 'Broken or Redirected Link should have no issue.'); - } - - public function testCheckRedirected() - { - $html = '
I am a link.
'; - $dom = new \DOMDocument('1.0', 'utf-8'); - $dom->loadHTML($html); - $rule = new BrokenRedirectedLink($dom); - - $this->assertEquals(1, $rule->check(), 'Broken or Redirected Link should have one issue.'); - } - - public function testCheckRedirectedAndMetadata() - { - $html = '
I am a link.
'; - $dom = new \DOMDocument('1.0', 'utf-8'); - $dom->loadHTML($html); - $rule = new BrokenRedirectedLink($dom); - - // Check if metadata is present with a new link - $result = $rule->check(); - if ($rule->getIssues() && count($rule->getIssues()) == 1) { - $meta = $rule->getIssues()[0]->getMetadata(); - $result = 1 + $result; - } - - $this->assertEquals(2, $result, 'Broken or Redirected Link should have one issue.'); - } -} From 21a018c44e5f8faa5adc8bd57aaf009256d25641 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Wed, 20 Oct 2021 16:18:44 -0400 Subject: [PATCH 12/23] Add new split rules --- src/Rule/BrokenLink.php | 63 +++++++++++++++++++++++++++++++ src/Rule/RedirectedLink.php | 73 ++++++++++++++++++++++++++++++++++++ tests/BrokenLinkTest.php | 36 ++++++++++++++++++ tests/RedirectedLinkTest.php | 42 +++++++++++++++++++++ 4 files changed, 214 insertions(+) create mode 100644 src/Rule/BrokenLink.php create mode 100644 src/Rule/RedirectedLink.php create mode 100644 tests/BrokenLinkTest.php create mode 100644 tests/RedirectedLinkTest.php diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php new file mode 100644 index 0000000..7b4d620 --- /dev/null +++ b/src/Rule/BrokenLink.php @@ -0,0 +1,63 @@ + $link) { + $curls[$i] = curl_init(); + curl_setopt($curls[$i], CURLOPT_URL, $link); + curl_setopt($curls[$i], CURLOPT_HEADER, true); + curl_setopt($curls[$i], CURLOPT_NOBODY, true); + curl_setopt($curls[$i], CURLOPT_REFERER, true); + curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); + curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); + curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); + curl_multi_add_handle($mcurl, $curls[$i]); + } + $running = null; + do { + curl_multi_exec($mcurl, $running); + } while ($running > 0); + foreach (array_keys($links) as $i => $link) { + $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); + // If the status is greater than or equal to 400 the link is broken. + if (400 >= $status) { + $this->setIssue($links[$link]); + } + curl_multi_remove_handle($mcurl, $curls[$i]); + } + curl_multi_close($mcurl); + } + + public function check() + { + $links = array(); + foreach ($this->getAllElements('a') as $a) { + $href = $a->getAttribute('href'); + if ($href) { + $links[$href] = $a; + } + } + $this->linkCheck($links); + + return count($this->issues); + } +} diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php new file mode 100644 index 0000000..e43451c --- /dev/null +++ b/src/Rule/RedirectedLink.php @@ -0,0 +1,73 @@ + $link) { + $curls[$i] = curl_init(); + curl_setopt($curls[$i], CURLOPT_URL, $link); + curl_setopt($curls[$i], CURLOPT_HEADER, true); + curl_setopt($curls[$i], CURLOPT_NOBODY, true); + curl_setopt($curls[$i], CURLOPT_REFERER, true); + curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); + curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); + curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); + curl_multi_add_handle($mcurl, $curls[$i]); + } + $running = null; + do { + curl_multi_exec($mcurl, $running); + } while ($running > 0); + foreach (array_keys($links) as $i => $link) { + $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); + $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); + if ($status < 400 && $link != $redirect) { + // Redirected link (May be a Canvas link that is not actually redirected) + $ref = $redirect; + preg_match('/^[^#\s]+/', $ref, $matches); + $base = $matches[0]; + $base = preg_replace('/\/$/', '', $base); + $base = preg_replace('/www\./', '', $base); + $base = preg_replace('/http[s]{0,1}:\/\//', '', $base); + if (strpos($link, $base) === false) { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + } + } + curl_multi_remove_handle($mcurl, $curls[$i]); + } + curl_multi_close($mcurl); + } + + public function check() + { + $links = array(); + foreach ($this->getAllElements('a') as $a) { + $href = $a->getAttribute('href'); + if ($href) { + $links[$href] = $a; + } + } + $this->linkCheck($links); + + return count($this->issues); + } +} + diff --git a/tests/BrokenLinkTest.php b/tests/BrokenLinkTest.php new file mode 100644 index 0000000..5805e13 --- /dev/null +++ b/tests/BrokenLinkTest.php @@ -0,0 +1,36 @@ +I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenLink($dom); + + $this->assertEquals(0, $rule->check(), 'BrokenLink should have no issue.'); + } + + public function testCheckBroken400() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenLink($dom); + + $this->assertEquals(1, $rule->check(), 'BrokenLink should have one issue.'); + } + + public function testCheckBroken404() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new BrokenLink($dom); + + $this->assertEquals(1, $rule->check(), 'BrokenLink should have one issue.'); + } +} diff --git a/tests/RedirectedLinkTest.php b/tests/RedirectedLinkTest.php new file mode 100644 index 0000000..fa9df98 --- /dev/null +++ b/tests/RedirectedLinkTest.php @@ -0,0 +1,42 @@ +I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new RedirectedLink($dom); + + $this->assertEquals(0, $rule->check(), 'RedirectedLink should have no issue.'); + } + + public function testCheckRedirected() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new RedirectedLink($dom); + + $this->assertEquals(1, $rule->check(), 'RedirectedLink should have one issue.'); + } + + public function testCheckRedirectedAndMetadata() + { + $html = '
I am a link.
'; + $dom = new \DOMDocument('1.0', 'utf-8'); + $dom->loadHTML($html); + $rule = new RedirectedLink($dom); + + // Check if metadata is present with a new link + $result = $rule->check(); + if ($rule->getIssues() && count($rule->getIssues()) == 1) { + $meta = $rule->getIssues()[0]->getMetadata(); + $result = 1 + $result; + } + + $this->assertEquals(2, $result, 'RedirectedLink should have one issue.'); + } +} From 2a4f1c4b5589120b6d54ad384659eb6fceed3a48 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 09:07:50 -0400 Subject: [PATCH 13/23] update curl option --- src/Rule/BrokenLink.php | 2 +- src/Rule/RedirectedLink.php | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php index 7b4d620..0ed8571 100644 --- a/src/Rule/BrokenLink.php +++ b/src/Rule/BrokenLink.php @@ -37,7 +37,7 @@ private function linkCheck($links) { curl_multi_exec($mcurl, $running); } while ($running > 0); foreach (array_keys($links) as $i => $link) { - $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); + $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // If the status is greater than or equal to 400 the link is broken. if (400 >= $status) { $this->setIssue($links[$link]); diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index e43451c..464c799 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -38,8 +38,8 @@ private function linkCheck($links) { } while ($running > 0); foreach (array_keys($links) as $i => $link) { $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); - $status = curl_getinfo($curls[$i], CURLINFO_HTTP_CODE); - if ($status < 400 && $link != $redirect) { + $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); + if (($status < 400) && ($link != $redirect)) { // Redirected link (May be a Canvas link that is not actually redirected) $ref = $redirect; preg_match('/^[^#\s]+/', $ref, $matches); From 1e85f34a0f2c6b4c1d7209f4dc6bdbdf4184507f Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 09:20:11 -0400 Subject: [PATCH 14/23] Fix comparisons --- src/Rule/BrokenLink.php | 2 +- src/Rule/RedirectedLink.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php index 0ed8571..da99ff0 100644 --- a/src/Rule/BrokenLink.php +++ b/src/Rule/BrokenLink.php @@ -39,7 +39,7 @@ private function linkCheck($links) { foreach (array_keys($links) as $i => $link) { $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // If the status is greater than or equal to 400 the link is broken. - if (400 >= $status) { + if (400 <= $status) { $this->setIssue($links[$link]); } curl_multi_remove_handle($mcurl, $curls[$i]); diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index 464c799..a4c410f 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -39,7 +39,7 @@ private function linkCheck($links) { foreach (array_keys($links) as $i => $link) { $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); - if (($status < 400) && ($link != $redirect)) { + if ((400 > $status) && ($link != $redirect)) { // Redirected link (May be a Canvas link that is not actually redirected) $ref = $redirect; preg_match('/^[^#\s]+/', $ref, $matches); From 59b142e4b6a8eb1af87558ac693916f206487ced Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 10:37:17 -0400 Subject: [PATCH 15/23] Use parse_url --- src/Rule/RedirectedLink.php | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index a4c410f..aff9464 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -41,15 +41,11 @@ private function linkCheck($links) { $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); if ((400 > $status) && ($link != $redirect)) { // Redirected link (May be a Canvas link that is not actually redirected) - $ref = $redirect; - preg_match('/^[^#\s]+/', $ref, $matches); - $base = $matches[0]; - $base = preg_replace('/\/$/', '', $base); - $base = preg_replace('/www\./', '', $base); - $base = preg_replace('/http[s]{0,1}:\/\//', '', $base); - if (strpos($link, $base) === false) { + $parsed_link = parse_url($link); + $parsed_redirect = parse_url($redirect); + if ($parsed_link[host] !== $parsed_redirect[host] || + $parsed_link[path] !== $parsed_redirect[path]) $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); - } } curl_multi_remove_handle($mcurl, $curls[$i]); } From 390712b796c742a7d895781fac7a225d6e7ee8c3 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 10:57:19 -0400 Subject: [PATCH 16/23] Refactor to check for permanent redirects only --- src/Rule/RedirectedLink.php | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index aff9464..3180a25 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -27,9 +27,7 @@ private function linkCheck($links) { curl_setopt($curls[$i], CURLOPT_NOBODY, true); curl_setopt($curls[$i], CURLOPT_REFERER, true); curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); - curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); - curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); curl_multi_add_handle($mcurl, $curls[$i]); } $running = null; @@ -37,16 +35,14 @@ private function linkCheck($links) { curl_multi_exec($mcurl, $running); } while ($running > 0); foreach (array_keys($links) as $i => $link) { - $redirect = curl_getinfo($curls[$i], CURLINFO_EFFECTIVE_URL); + $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); - if ((400 > $status) && ($link != $redirect)) { - // Redirected link (May be a Canvas link that is not actually redirected) - $parsed_link = parse_url($link); - $parsed_redirect = parse_url($redirect); - if ($parsed_link[host] !== $parsed_redirect[host] || - $parsed_link[path] !== $parsed_redirect[path]) - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + + // Only permanent redirections are a problem + if ($status === 301 || $status === 308) { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); } + curl_multi_remove_handle($mcurl, $curls[$i]); } curl_multi_close($mcurl); From 65c312e5779707cd339da8ddfa0a1cff62c6775b Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 11:15:38 -0400 Subject: [PATCH 17/23] Add back parse_url --- src/Rule/RedirectedLink.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index 3180a25..e932e01 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -40,7 +40,13 @@ private function linkCheck($links) { // Only permanent redirections are a problem if ($status === 301 || $status === 308) { - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + $parsed_link = parse_url($link); + $parsed_redirect = parse_url($redirect); + if ($parsed_link[host] !== $parsed_redirect[host] || + $parsed_link[path] !== $parsed_redirect[path]) { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + } + } curl_multi_remove_handle($mcurl, $curls[$i]); From efc9cf4065564e382586d804eed96eac90aba87b Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Thu, 21 Oct 2021 11:30:55 -0400 Subject: [PATCH 18/23] Fix parse_url --- src/Rule/RedirectedLink.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index e932e01..8e70a4d 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -42,11 +42,10 @@ private function linkCheck($links) { if ($status === 301 || $status === 308) { $parsed_link = parse_url($link); $parsed_redirect = parse_url($redirect); - if ($parsed_link[host] !== $parsed_redirect[host] || - $parsed_link[path] !== $parsed_redirect[path]) { + if ($parsed_link['host'] !== $parsed_redirect['host'] || + $parsed_link['path'] !== $parsed_redirect['path']) { $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); } - } curl_multi_remove_handle($mcurl, $curls[$i]); From 379173de46097bbcddb5c269213e8a400e2c08b0 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Fri, 22 Oct 2021 08:27:18 -0400 Subject: [PATCH 19/23] Single step link checking --- src/Rule/BrokenLink.php | 2 -- src/Rule/RedirectedLink.php | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php index da99ff0..5106cae 100644 --- a/src/Rule/BrokenLink.php +++ b/src/Rule/BrokenLink.php @@ -27,9 +27,7 @@ private function linkCheck($links) { curl_setopt($curls[$i], CURLOPT_NOBODY, true); curl_setopt($curls[$i], CURLOPT_REFERER, true); curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); - curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); - curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); curl_multi_add_handle($mcurl, $curls[$i]); } $running = null; diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index 8e70a4d..af686af 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -5,7 +5,7 @@ use DOMElement; /** -* Links that are redirected should be updated with the new link. +* Links that are permanently redirected should be updated with the new link. * Based on UDOIT 2.8.X https://github.com/ucfopen/UDOIT/blob/classic/lib/Udoit.php * contributions by Emily Sachs */ From 4c2445432a9b8cc1cccdb15c9de228ba5dc54e6e Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Fri, 22 Oct 2021 11:13:23 -0400 Subject: [PATCH 20/23] Fix broken links also showing as redirected --- src/Rule/BrokenLink.php | 33 +++++++------- src/Rule/RedirectedLink.php | 86 +++++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 37 deletions(-) diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php index 5106cae..fec4a8e 100644 --- a/src/Rule/BrokenLink.php +++ b/src/Rule/BrokenLink.php @@ -17,7 +17,21 @@ public function id() return self::class; } - private function linkCheck($links) { + public function check() + { + $links = array(); + foreach ($this->getAllElements('a') as $a) { + $href = $a->getAttribute('href'); + if ($href) { + $links[$href] = $a; + } + } + $this->checkLink($links); + + return count($this->issues); + } + + private function checkLink($links) { $curls = array(); $mcurl = curl_multi_init(); foreach (array_keys($links) as $i => $link) { @@ -27,7 +41,10 @@ private function linkCheck($links) { curl_setopt($curls[$i], CURLOPT_NOBODY, true); curl_setopt($curls[$i], CURLOPT_REFERER, true); curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); + curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); curl_multi_add_handle($mcurl, $curls[$i]); } $running = null; @@ -44,18 +61,4 @@ private function linkCheck($links) { } curl_multi_close($mcurl); } - - public function check() - { - $links = array(); - foreach ($this->getAllElements('a') as $a) { - $href = $a->getAttribute('href'); - if ($href) { - $links[$href] = $a; - } - } - $this->linkCheck($links); - - return count($this->issues); - } } diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index af686af..a35df31 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -17,7 +17,21 @@ public function id() return self::class; } - private function linkCheck($links) { + public function check() + { + $links = array(); + foreach ($this->getAllElements('a') as $a) { + $href = $a->getAttribute('href'); + if ($href) { + $links[$href] = $a; + } + } + $this->checkLink($links); + + return count($this->issues); + } + + private function checkLink($links) { $curls = array(); $mcurl = curl_multi_init(); foreach (array_keys($links) as $i => $link) { @@ -27,7 +41,10 @@ private function linkCheck($links) { curl_setopt($curls[$i], CURLOPT_NOBODY, true); curl_setopt($curls[$i], CURLOPT_REFERER, true); curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_TIMEOUT, 2); + curl_setopt($curls[$i], CURLOPT_AUTOREFERER, true); curl_setopt($curls[$i], CURLOPT_RETURNTRANSFER, true); + curl_setopt($curls[$i], CURLOPT_FOLLOWLOCATION, true); curl_multi_add_handle($mcurl, $curls[$i]); } $running = null; @@ -35,36 +52,61 @@ private function linkCheck($links) { curl_multi_exec($mcurl, $running); } while ($running > 0); foreach (array_keys($links) as $i => $link) { - $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); - - // Only permanent redirections are a problem - if ($status === 301 || $status === 308) { - $parsed_link = parse_url($link); - $parsed_redirect = parse_url($redirect); - if ($parsed_link['host'] !== $parsed_redirect['host'] || - $parsed_link['path'] !== $parsed_redirect['path']) { - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); - } + // If the status is 400 or greater the link is broken so dont bother checking. + if (400 > $status) { + $this->checkRedirect($links[$link]); } - curl_multi_remove_handle($mcurl, $curls[$i]); } curl_multi_close($mcurl); } - public function check() - { - $links = array(); - foreach ($this->getAllElements('a') as $a) { - $href = $a->getAttribute('href'); - if ($href) { - $links[$href] = $a; - } + private function checkRedirect($link) { + $curl = curl_init(); + curl_setopt($curl, CURLOPT_URL, $link); + curl_setopt($curl, CURLOPT_HEADER, true); + curl_setopt($curl, CURLOPT_NOBODY, true); + curl_setopt($curl, CURLOPT_REFERER, true); + curl_setopt($curl, CURLOPT_TIMEOUT, 2); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + + $result = curl_exec($curl); + curl_close($curl); + $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); + $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); + + // Only permanent redirections are a problem + if ($status === 301 || $status === 308) { + followPermanentRedirects($link, $redirect); } - $this->linkCheck($links); + } - return count($this->issues); + private function followPermanentRedirects($original, $link, $maxRedirects = 20) { + if (maxRedirects < 1) { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + return; + } + + $curl = curl_init(); + curl_setopt($curl, CURLOPT_URL, $link); + curl_setopt($curl, CURLOPT_HEADER, true); + curl_setopt($curl, CURLOPT_NOBODY, true); + curl_setopt($curl, CURLOPT_REFERER, true); + curl_setopt($curl, CURLOPT_TIMEOUT, 2); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + + $result = curl_exec($curl); + curl_close($curl); + $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); + $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); + + // Only permanent redirections are a problem + if ($status === 301 || $status === 308) { + followPermanentRedirects($redirect, $maxRedirects - 1); + } else { + $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + } } } From b706dcea201c73f2ade5c6c3857dae9d7e28556c Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Fri, 22 Oct 2021 11:21:13 -0400 Subject: [PATCH 21/23] Fix redirectedlink rule curl calls --- src/Rule/RedirectedLink.php | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index a35df31..e63bf66 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -71,10 +71,10 @@ private function checkRedirect($link) { curl_setopt($curl, CURLOPT_TIMEOUT, 2); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); - $result = curl_exec($curl); + curl_exec($curl); + $redirect = curl_getinfo($curl, CURLINFO_REDIRECT_URL); + $status = curl_getinfo($curl, CURLINFO_RESPONSE_CODE); curl_close($curl); - $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); - $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // Only permanent redirections are a problem if ($status === 301 || $status === 308) { @@ -83,10 +83,8 @@ private function checkRedirect($link) { } private function followPermanentRedirects($original, $link, $maxRedirects = 20) { - if (maxRedirects < 1) { - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + if (maxRedirects < 1) return; - } $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $link); @@ -96,16 +94,16 @@ private function followPermanentRedirects($original, $link, $maxRedirects = 20) curl_setopt($curl, CURLOPT_TIMEOUT, 2); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); - $result = curl_exec($curl); + curl_exec($curl); + $redirect = curl_getinfo($curl, CURLINFO_REDIRECT_URL); + $status = curl_getinfo($curl, CURLINFO_RESPONSE_CODE); curl_close($curl); - $redirect = curl_getinfo($curls[$i], CURLINFO_REDIRECT_URL); - $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // Only permanent redirections are a problem if ($status === 301 || $status === 308) { followPermanentRedirects($redirect, $maxRedirects - 1); } else { - $this->setIssue($links[$link], null, json_encode(array('redirect_url' => $redirect))); + $this->setIssue($original, null, json_encode(array('redirect_url' => $redirect))); } } } From 1bb0e3a5369ca32ad63c12ddf4c51e7331b1ceaa Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Mon, 25 Oct 2021 09:36:15 -0400 Subject: [PATCH 22/23] Fix wrong link getting passed --- src/Rule/BrokenLink.php | 2 +- src/Rule/RedirectedLink.php | 16 ++++++++++------ tests/BrokenLinkTest.php | 2 +- tests/PhpAllyTest.php | 12 ++++++------ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Rule/BrokenLink.php b/src/Rule/BrokenLink.php index fec4a8e..c0c0475 100644 --- a/src/Rule/BrokenLink.php +++ b/src/Rule/BrokenLink.php @@ -54,7 +54,7 @@ private function checkLink($links) { foreach (array_keys($links) as $i => $link) { $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // If the status is greater than or equal to 400 the link is broken. - if (400 <= $status) { + if ($status >= 400) { $this->setIssue($links[$link]); } curl_multi_remove_handle($mcurl, $curls[$i]); diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index e63bf66..6baeb51 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -54,7 +54,7 @@ private function checkLink($links) { foreach (array_keys($links) as $i => $link) { $status = curl_getinfo($curls[$i], CURLINFO_RESPONSE_CODE); // If the status is 400 or greater the link is broken so dont bother checking. - if (400 > $status) { + if ($status < 400) { $this->checkRedirect($links[$link]); } curl_multi_remove_handle($mcurl, $curls[$i]); @@ -62,7 +62,8 @@ private function checkLink($links) { curl_multi_close($mcurl); } - private function checkRedirect($link) { + private function checkRedirect($original) { + $link = $original->getAttribute('href'); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $link); curl_setopt($curl, CURLOPT_HEADER, true); @@ -78,13 +79,16 @@ private function checkRedirect($link) { // Only permanent redirections are a problem if ($status === 301 || $status === 308) { - followPermanentRedirects($link, $redirect); + $this->followPermanentRedirects($original, $redirect); } } private function followPermanentRedirects($original, $link, $maxRedirects = 20) { - if (maxRedirects < 1) + // Avoid infinite calls. 20 is chrome and firefox redirect limit. + if ($maxRedirects < 1) { + $this->setIssue($original, null, json_encode(array('redirect_url' => $link))); return; + } $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $link); @@ -99,9 +103,9 @@ private function followPermanentRedirects($original, $link, $maxRedirects = 20) $status = curl_getinfo($curl, CURLINFO_RESPONSE_CODE); curl_close($curl); - // Only permanent redirections are a problem + // Continue until we run out of permanent redirects if ($status === 301 || $status === 308) { - followPermanentRedirects($redirect, $maxRedirects - 1); + $this->followPermanentRedirects($original, $redirect, $maxRedirects - 1); } else { $this->setIssue($original, null, json_encode(array('redirect_url' => $redirect))); } diff --git a/tests/BrokenLinkTest.php b/tests/BrokenLinkTest.php index 5805e13..847fe3e 100644 --- a/tests/BrokenLinkTest.php +++ b/tests/BrokenLinkTest.php @@ -15,7 +15,7 @@ public function testCheckValid() public function testCheckBroken400() { - $html = '
I am a link.
'; $dom = new \DOMDocument('1.0', 'utf-8'); $dom->loadHTML($html); diff --git a/tests/PhpAllyTest.php b/tests/PhpAllyTest.php index 50ddc13..c8d8b08 100644 --- a/tests/PhpAllyTest.php +++ b/tests/PhpAllyTest.php @@ -29,7 +29,7 @@ public function testCheckOne() $this->phpAllyReportTest($report); } - public function testCheckMany() + public function testCheckMany() { $ally = new PhpAlly(); $options = [ @@ -41,17 +41,17 @@ public function testCheckMany() $report = $ally->checkMany($this->getManyHtml(), $ally->getRuleIds(), $options); $issues = $report->getIssues(); $issue = reset($issues); - - $this->assertCount(6, $issues, 'Total report should have 5 issues.'); + + $this->assertCount(6, $issues, 'Total report should have 6 issues.'); $this->phpAllyIssueTest($issue); $this->phpAllyReportTest($report); } - + protected function phpAllyReportTest($report) { $issues = $report->getIssues(); - + foreach($issues as $issue) { $this->phpAllyIssueTest($issue); } @@ -64,4 +64,4 @@ protected function phpAllyIssueTest(PhpAllyIssue $issue) $this->assertEquals(DOMElement::class, get_class($issue->getPreviewElement()), 'Issue return DomElement for getPreviewElement()'); } -} \ No newline at end of file +} From 47e2e69e1eea665048d6c6a57e4361e3c89c70c1 Mon Sep 17 00:00:00 2001 From: AlanFCMV Date: Mon, 25 Oct 2021 10:05:23 -0400 Subject: [PATCH 23/23] Fix wrong redirected link returned --- src/Rule/RedirectedLink.php | 2 +- tests/BrokenLinkTest.php | 3 +-- tests/PhpAllyTestCase.php | 10 +++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Rule/RedirectedLink.php b/src/Rule/RedirectedLink.php index 6baeb51..63e493c 100644 --- a/src/Rule/RedirectedLink.php +++ b/src/Rule/RedirectedLink.php @@ -107,7 +107,7 @@ private function followPermanentRedirects($original, $link, $maxRedirects = 20) if ($status === 301 || $status === 308) { $this->followPermanentRedirects($original, $redirect, $maxRedirects - 1); } else { - $this->setIssue($original, null, json_encode(array('redirect_url' => $redirect))); + $this->setIssue($original, null, json_encode(array('redirect_url' => $link))); } } } diff --git a/tests/BrokenLinkTest.php b/tests/BrokenLinkTest.php index 847fe3e..4a42807 100644 --- a/tests/BrokenLinkTest.php +++ b/tests/BrokenLinkTest.php @@ -15,8 +15,7 @@ public function testCheckValid() public function testCheckBroken400() { - $html = '
I am a link.
'; + $html = '
I am a link.
'; $dom = new \DOMDocument('1.0', 'utf-8'); $dom->loadHTML($html); $rule = new BrokenLink($dom); diff --git a/tests/PhpAllyTestCase.php b/tests/PhpAllyTestCase.php index 913bc5b..0f5154e 100644 --- a/tests/PhpAllyTestCase.php +++ b/tests/PhpAllyTestCase.php @@ -18,10 +18,10 @@ protected function getManyHtml() { return '

Paragraph text is here.

- - Click Here - - + + Click Here + +

Paragraph text does have enough contrast.

Paragraph text has enough contrast.

@@ -129,4 +129,4 @@ protected function getImageHtml() { return ''; } -} \ No newline at end of file +}