diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..c115f71 --- /dev/null +++ b/composer.json @@ -0,0 +1,5 @@ +{ + "require": { + "donatello-za/rake-php-plus": "^1.0" + } +} diff --git a/composer.lock b/composer.lock new file mode 100644 index 0000000..3b1fbba --- /dev/null +++ b/composer.lock @@ -0,0 +1,79 @@ +{ + "_readme": [ + "This file locks the dependencies of your project to a known state", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", + "This file is @generated automatically" + ], + "content-hash": "1ba534685c5e65fe280045e35f9bdeb3", + "packages": [ + { + "name": "donatello-za/rake-php-plus", + "version": "v1.0.18", + "source": { + "type": "git", + "url": "https://github.com/Donatello-za/rake-php-plus.git", + "reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Donatello-za/rake-php-plus/zipball/e9e9c0862b3dc953d288e8f42c76e4ceaeca0619", + "reference": "e9e9c0862b3dc953d288e8f42c76e4ceaeca0619", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-mbstring": "*", + "php": ">=5.4.0" + }, + "require-dev": { + "php": ">=5.5.0", + "phpunit/phpunit": "~4.0|~5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.13-dev" + } + }, + "autoload": { + "psr-4": { + "DonatelloZa\\RakePlus\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Don Schoeman", + "email": "ta.maximus@gmail.com" + } + ], + "description": "Yet another PHP implementation of the Rapid Automatic Keyword Extraction algorithm (RAKE).", + "homepage": "https://github.com/Donatello-za/rake-php-plus", + "keywords": [ + "Algorithm", + "automatic", + "extraction", + "keyword", + "rake", + "rapid" + ], + "support": { + "issues": "https://github.com/Donatello-za/rake-php-plus/issues", + "source": "https://github.com/Donatello-za/rake-php-plus" + }, + "time": "2022-02-23T18:42:03+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": [], + "platform-dev": [], + "plugin-api-version": "2.3.0" +} diff --git a/seo_analysis.php b/seo_analysis.php index ab4fc99..4286f70 100644 --- a/seo_analysis.php +++ b/seo_analysis.php @@ -31,19 +31,27 @@ function fetchHTML($url) return $html; } -// Function to check for URL redirects +// Function to check for URL redirects and return the redirection path function checkURLRedirects($url) { - $headers = get_headers($url, 1); + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_NOBODY, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - if (isset($headers['Location'])) { - return is_array($headers['Location']) - ? end($headers['Location']) - : $headers['Location']; + $response = curl_exec($ch); + + if ($response === false) { + // Error occurred while making the request + return false; } - return null; -} + $redirectUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + curl_close($ch); + + return $redirectUrl; +} // Function to check if robots.txt exists function checkRobotsTxt($url) { @@ -176,44 +184,51 @@ function countNodes($node) $internalLinkUrls = []; $internalLinkNodes = $xpath->query('//a[not(starts-with(@href, "#"))]'); foreach ($internalLinkNodes as $linkNode) { - $href = $linkNode->getAttribute('href'); - $text = trim(preg_replace('/\s+/', ' ', $linkNode->textContent)); - - if (!empty($href) && !empty($text)) { - // Check if $href is an absolute URL and belongs to the same domain - if (filter_var($href, FILTER_VALIDATE_URL)) { - $parsedHref = parse_url($href); - - if (isset($parsedHref['host']) && $parsedHref['host'] === parse_url($url, PHP_URL_HOST)) { - $fullUrl = $href; - } else { - continue; // Skip external URLs - } - } else { - $base = rtrim($url, '/'); - $separator = '/'; - if (substr($href, 0, 1) === '/') { - $separator = ''; - } - $fullUrl = $base . $separator . $href; + $href = $linkNode->getAttribute('href'); + $text = trim(preg_replace('/\s+/', ' ', $linkNode->textContent)); + + if (!empty($href) && !empty($text)) { + // Check if $href is an absolute URL and belongs to the same domain + if (filter_var($href, FILTER_VALIDATE_URL)) { + $parsedHref = parse_url($href); + print_r($parsedHref); + // Check if the parsed URL matches any of the domain variations + $parsedUrlHost = isset($parsedHref['host']) ? $parsedHref['host'] : ''; + $originalUrlHost = parse_url($url, PHP_URL_HOST); + $wwwOriginalUrlHost = 'www.' . $originalUrlHost; + + if ($parsedUrlHost === $originalUrlHost || $parsedUrlHost === $wwwOriginalUrlHost || $wwwOriginalUrlHost === $parsedUrlHost) { + $fullUrl = $href; + } else { + continue; // Skip external URLs + } + } else { + $base = rtrim($url, '/'); + $separator = '/'; + if (substr($href, 0, 1) === '/') { + $separator = ''; + } + $fullUrl = $base . $separator . $href; + } + + $lowercaseUrl = strtolower($fullUrl); + + // Check if the lowercase URL has already been added to the array + $isInternalLink = isset($internalLinkUrls[$lowercaseUrl]); + + if (!$isInternalLink) { + $internalLinks[] = [ + 'url' => $fullUrl, + 'text' => $text + ]; + + // Add the lowercase URL to the list of added URLs + $internalLinkUrls[$lowercaseUrl] = true; + } } +} - $lowercaseUrl = strtolower($fullUrl); - - // Check if the lowercase URL has already been added to the array - $isInternalLink = isset($internalLinkUrls[$lowercaseUrl]); - - if (!$isInternalLink) { - $internalLinks[] = [ - 'url' => $fullUrl, - 'text' => $text - ]; - // Add the lowercase URL to the list of added URLs - $internalLinkUrls[$lowercaseUrl] = true; - } - } -} // Extract external links with link text $externalLinks = []; @@ -348,9 +363,132 @@ function checkSitemap($url) } // Check if the sitemap exists $sitemapUrl = checkSitemap($url); -// new code add here +function extractTrackingID($html) +{ + $matches = []; + $pattern = '/UA-\d{4,}-\d{1,}/'; + preg_match($pattern, $html, $matches); + return isset($matches[0]) ? $matches[0] : null; +} +// Extract the Google Analytics tracking ID from the HTML +$trackingID = extractTrackingID($html); +function extractSocialMediaMetaTags($html) +{ + $dom = new DOMDocument(); + libxml_use_internal_errors(true); + $dom->loadHTML($html); + libxml_clear_errors(); + + $metaTags = $dom->getElementsByTagName('meta'); + $openGraphTags = array(); + $twitterCardTags = array(); + $facebookTags = array(); + $pinterestTags = array(); + $linkedinTags = array(); + $instagramTags = array(); + $googlePlusTags = array(); + + foreach ($metaTags as $metaTag) { + $property = $metaTag->getAttribute('property'); + $name = $metaTag->getAttribute('name'); + $content = $metaTag->getAttribute('content'); + + if (strpos($property, 'og:') === 0) { + $openGraphTags[$property] = $content; + } elseif (strpos($name, 'twitter:') === 0) { + $twitterCardTags[$name] = $content; + } elseif (strpos($property, 'fb:') === 0) { + $facebookTags[$property] = $content; + } elseif ($name === 'pinterest-rich-pin') { + $pinterestTags[$name] = $content; + } elseif (strpos($property, 'linkedin:') === 0) { + $linkedinTags[$property] = $content; + } elseif ($name === 'instagram:app_id') { + $instagramTags[$name] = $content; + } elseif (strpos($name, 'google+:') === 0) { + $googlePlusTags[$name] = $content; + } + } + + $socialMediaMetaTags = array( + 'openGraph' => $openGraphTags, + 'twitterCard' => $twitterCardTags, + 'facebook' => $facebookTags, + 'pinterest' => $pinterestTags, + 'linkedin' => $linkedinTags, + 'instagram' => $instagramTags, + 'googlePlus' => $googlePlusTags + ); + + foreach ($socialMediaMetaTags as $key => $value) { + if (empty($value)) { + $socialMediaMetaTags[$key] = false; + } + } + + return $socialMediaMetaTags; +} +// Extract the social media meta tags from the HTML +$socialMediaMetaTags = extractSocialMediaMetaTags($html); +// Function to check if a URL returns a 404 status code +function is404Page($url) +{ + $headers = get_headers($url); + + if ($headers && strpos($headers[0], '404') !== false) { + return true; // Custom 404 page exists + } + + return false; // No custom 404 page +} +// Construct the URL for a non-existent page (e.g., example.com/non-existent-page) +$nonExistentPageUrl = rtrim($url, '/') . '/non-existent-page'; +// Check if the non-existent page returns a 404 status code +$hasCustom404Page = is404Page($nonExistentPageUrl); + + + +//gpt new code add here + + +function isCompressionEnabled($url) +{ + $headers = get_headers($url, 1); + + if (isset($headers['Content-Encoding'])) { + $contentEncoding = $headers['Content-Encoding']; + + if ( + stripos($contentEncoding, 'gzip') !== false + || stripos($contentEncoding, 'deflate') !== false + || stripos($contentEncoding, 'br') !== false + ) { + return true; + } + } + + return false; +} + +// Usage example: +$isCompressionEnabled = isCompressionEnabled($url); + + + + + + + +require 'vendor/autoload.php'; + +use DonatelloZa\RakePlus\RakePlus; +$text = "Criteria of compatibility of a system of linear Diophantine equations, " . + "strict inequations, and nonstrict inequations are considered. Upper bounds " . + "for components of a minimal set of solutions and algorithms of construction " . + "of minimal generating sets of solutions for all types of systems are given."; +$mostCommonKeywords = RakePlus::create($text)->keywords(); @@ -359,6 +497,10 @@ function checkSitemap($url) // Build the SEO report array $report = [ 'url' => $url, + 'isCompression' => $isCompressionEnabled, + 'googleTrackingID' => $trackingID, + 'hasCustom404Page' => $hasCustom404Page, + 'socialMetaTags' => $socialMediaMetaTags, 'favicon' => $favicon, 'language' => $language, 'hasDoctype' => $hasDoctype,