Skip to content

Commit

Permalink
updated latest commit
Browse files Browse the repository at this point in the history
  • Loading branch information
anonymous-sherlock committed Jul 3, 2023
1 parent 8520863 commit 8808108
Show file tree
Hide file tree
Showing 3 changed files with 270 additions and 44 deletions.
5 changes: 5 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"require": {
"donatello-za/rake-php-plus": "^1.0"
}
}
79 changes: 79 additions & 0 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

230 changes: 186 additions & 44 deletions seo_analysis.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,27 @@ function fetchHTML($url)
return $html;
}

// Function to check for URL redirects
// Function to check for URL redirects and return the redirection path
function checkURLRedirects($url)
{
$headers = get_headers($url, 1);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

if (isset($headers['Location'])) {
return is_array($headers['Location'])
? end($headers['Location'])
: $headers['Location'];
$response = curl_exec($ch);

if ($response === false) {
// Error occurred while making the request
return false;
}
return null;
}

$redirectUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close($ch);

return $redirectUrl;
}
// Function to check if robots.txt exists
function checkRobotsTxt($url)
{
Expand Down Expand Up @@ -176,44 +184,51 @@ function countNodes($node)
$internalLinkUrls = [];
$internalLinkNodes = $xpath->query('//a[not(starts-with(@href, "#"))]');
foreach ($internalLinkNodes as $linkNode) {
$href = $linkNode->getAttribute('href');
$text = trim(preg_replace('/\s+/', ' ', $linkNode->textContent));

if (!empty($href) && !empty($text)) {
// Check if $href is an absolute URL and belongs to the same domain
if (filter_var($href, FILTER_VALIDATE_URL)) {
$parsedHref = parse_url($href);

if (isset($parsedHref['host']) && $parsedHref['host'] === parse_url($url, PHP_URL_HOST)) {
$fullUrl = $href;
} else {
continue; // Skip external URLs
}
} else {
$base = rtrim($url, '/');
$separator = '/';
if (substr($href, 0, 1) === '/') {
$separator = '';
}
$fullUrl = $base . $separator . $href;
$href = $linkNode->getAttribute('href');
$text = trim(preg_replace('/\s+/', ' ', $linkNode->textContent));

if (!empty($href) && !empty($text)) {
// Check if $href is an absolute URL and belongs to the same domain
if (filter_var($href, FILTER_VALIDATE_URL)) {
$parsedHref = parse_url($href);
print_r($parsedHref);
// Check if the parsed URL matches any of the domain variations
$parsedUrlHost = isset($parsedHref['host']) ? $parsedHref['host'] : '';
$originalUrlHost = parse_url($url, PHP_URL_HOST);
$wwwOriginalUrlHost = 'www.' . $originalUrlHost;

if ($parsedUrlHost === $originalUrlHost || $parsedUrlHost === $wwwOriginalUrlHost || $wwwOriginalUrlHost === $parsedUrlHost) {
$fullUrl = $href;
} else {
continue; // Skip external URLs
}
} else {
$base = rtrim($url, '/');
$separator = '/';
if (substr($href, 0, 1) === '/') {
$separator = '';
}
$fullUrl = $base . $separator . $href;
}

$lowercaseUrl = strtolower($fullUrl);

// Check if the lowercase URL has already been added to the array
$isInternalLink = isset($internalLinkUrls[$lowercaseUrl]);

if (!$isInternalLink) {
$internalLinks[] = [
'url' => $fullUrl,
'text' => $text
];

// Add the lowercase URL to the list of added URLs
$internalLinkUrls[$lowercaseUrl] = true;
}
}
}

$lowercaseUrl = strtolower($fullUrl);

// Check if the lowercase URL has already been added to the array
$isInternalLink = isset($internalLinkUrls[$lowercaseUrl]);

if (!$isInternalLink) {
$internalLinks[] = [
'url' => $fullUrl,
'text' => $text
];

// Add the lowercase URL to the list of added URLs
$internalLinkUrls[$lowercaseUrl] = true;
}
}
}

// Extract external links with link text
$externalLinks = [];
Expand Down Expand Up @@ -348,9 +363,132 @@ function checkSitemap($url)
}
// Check if the sitemap exists
$sitemapUrl = checkSitemap($url);
// new code add here
function extractTrackingID($html)
{
$matches = [];
$pattern = '/UA-\d{4,}-\d{1,}/';
preg_match($pattern, $html, $matches);
return isset($matches[0]) ? $matches[0] : null;
}
// Extract the Google Analytics tracking ID from the HTML
$trackingID = extractTrackingID($html);
function extractSocialMediaMetaTags($html)
{
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($html);
libxml_clear_errors();

$metaTags = $dom->getElementsByTagName('meta');
$openGraphTags = array();
$twitterCardTags = array();
$facebookTags = array();
$pinterestTags = array();
$linkedinTags = array();
$instagramTags = array();
$googlePlusTags = array();

foreach ($metaTags as $metaTag) {
$property = $metaTag->getAttribute('property');
$name = $metaTag->getAttribute('name');
$content = $metaTag->getAttribute('content');

if (strpos($property, 'og:') === 0) {
$openGraphTags[$property] = $content;
} elseif (strpos($name, 'twitter:') === 0) {
$twitterCardTags[$name] = $content;
} elseif (strpos($property, 'fb:') === 0) {
$facebookTags[$property] = $content;
} elseif ($name === 'pinterest-rich-pin') {
$pinterestTags[$name] = $content;
} elseif (strpos($property, 'linkedin:') === 0) {
$linkedinTags[$property] = $content;
} elseif ($name === 'instagram:app_id') {
$instagramTags[$name] = $content;
} elseif (strpos($name, 'google+:') === 0) {
$googlePlusTags[$name] = $content;
}
}

$socialMediaMetaTags = array(
'openGraph' => $openGraphTags,
'twitterCard' => $twitterCardTags,
'facebook' => $facebookTags,
'pinterest' => $pinterestTags,
'linkedin' => $linkedinTags,
'instagram' => $instagramTags,
'googlePlus' => $googlePlusTags
);

foreach ($socialMediaMetaTags as $key => $value) {
if (empty($value)) {
$socialMediaMetaTags[$key] = false;
}
}

return $socialMediaMetaTags;
}
// Extract the social media meta tags from the HTML
$socialMediaMetaTags = extractSocialMediaMetaTags($html);
// Function to check if a URL returns a 404 status code
function is404Page($url)
{
$headers = get_headers($url);

if ($headers && strpos($headers[0], '404') !== false) {
return true; // Custom 404 page exists
}

return false; // No custom 404 page
}
// Construct the URL for a non-existent page (e.g., example.com/non-existent-page)
$nonExistentPageUrl = rtrim($url, '/') . '/non-existent-page';
// Check if the non-existent page returns a 404 status code
$hasCustom404Page = is404Page($nonExistentPageUrl);



//gpt new code add here


function isCompressionEnabled($url)
{
$headers = get_headers($url, 1);

if (isset($headers['Content-Encoding'])) {
$contentEncoding = $headers['Content-Encoding'];

if (
stripos($contentEncoding, 'gzip') !== false
|| stripos($contentEncoding, 'deflate') !== false
|| stripos($contentEncoding, 'br') !== false
) {
return true;
}
}

return false;
}

// Usage example:
$isCompressionEnabled = isCompressionEnabled($url);







require 'vendor/autoload.php';

use DonatelloZa\RakePlus\RakePlus;

$text = "Criteria of compatibility of a system of linear Diophantine equations, " .
"strict inequations, and nonstrict inequations are considered. Upper bounds " .
"for components of a minimal set of solutions and algorithms of construction " .
"of minimal generating sets of solutions for all types of systems are given.";

$mostCommonKeywords = RakePlus::create($text)->keywords();



Expand All @@ -359,6 +497,10 @@ function checkSitemap($url)
// Build the SEO report array
$report = [
'url' => $url,
'isCompression' => $isCompressionEnabled,
'googleTrackingID' => $trackingID,
'hasCustom404Page' => $hasCustom404Page,
'socialMetaTags' => $socialMediaMetaTags,
'favicon' => $favicon,
'language' => $language,
'hasDoctype' => $hasDoctype,
Expand Down

0 comments on commit 8808108

Please sign in to comment.