Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
126 lines (104 sloc) 4.09 KB
<?php
function curPageURL() {
$pageURL = 'http';
if (isset($_SERVER["HTTPS"]) && $_SERVER["HTTPS"] == "on") {$pageURL .= "s";}
$pageURL .= "://";
if ($_SERVER["SERVER_PORT"] != "80") {
$pageURL .= $_SERVER["SERVER_NAME"].":".$_SERVER["SERVER_PORT"].$_SERVER["PHP_SELF"];
} else {
$pageURL .= $_SERVER["SERVER_NAME"].$_SERVER["PHP_SELF"];
}
return $pageURL;
}
$success = false;
$content = '';
$url = '';
$errors = array();
$messages = array();
if(!isset($_GET['url']) || $_GET['url'] == "") {
$errors[] = 'Please provide a valid url to a valid sitemap.xml';
}
if(count($errors) === 0)
{
$url = $_GET['url'];
if (filter_var($url, FILTER_VALIDATE_URL) === false)
{
$errors[] = $url . ' is not a valid URL.';
}
$sitemap = file_get_contents($url);
if(!$sitemap) {
$errors[] = 'Unable to get remote sitemap.';
} elseif(substr($sitemap, 0, 5) != "<?xml") {
$errors[] = 'It\'s not a valid XML file.';
} else {
try {
$xml = new SimpleXMLElement($sitemap);
} catch (Exception $e) {
$errors[] = $e->getMessage();
}
$domain = parse_url($_GET['url'], PHP_URL_HOST);
$lineBreak = (isset($_GET['txt'])) ? "\r\n" : '<br />';
foreach($xml->url as $val)
{
$content .= $val->loc . $lineBreak;
}
$messages[] = count($xml->url) . ' url loaded from ' . $url;
$success = true;
}
}
if(isset($_GET['txt']) && count($errors) === 0)
{
header("Content-type: text/plain");
header("Content-Disposition: attachment; filename=". $domain .".sitemap.txt");
echo $content;
exit;
}
?><!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="author" content="Vincent Lahaye" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Sitemap URL Extractor</title>
<!-- Bootstrap -->
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css" rel="stylesheet">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<div class="container">
<a href="<?php echo curPageURL() ?>">
<h1 style="font-size: 18px;"><i class="glyphicon glyphicon-list-alt"></i> Extract sitemap.xml urls to (.txt) File</h1>
</a>
<form action="" method="get">
<input type="text" class="form-control" name="url" placeholder="Sitemap URL" value="<?php echo $url; ?>" />
<input type="submit" class="btn btn-warning pull-left" style="margin-top: 10px" value="Submit" />
<div class="checkbox pull-left" style="margin: 16px 0 0 10px;">
<label>
<input type="checkbox" name="txt"<?php if(isset($_GET['txt'])) echo "checked='checked'"; ?>> Download as .txt <i class="glyphicon glyphicon-download-alt"></i>
</label>
</div>
<div class="clearfix"></div>
</form>
<div style="margin-top: 20px;">
<?php foreach($errors as $error): ?>
<div class="alert alert-danger" role="alert"><?php echo $error ?></div>
<?php endforeach ?>
<?php foreach($messages as $msg): ?>
<div class="alert alert-success" role="alert"><?php echo $msg ?></div>
<?php endforeach ?>
<?php echo $content; ?>
</div>
</div>
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script>
</body>
</html>
You can’t perform that action at this time.