Skip to content

Commit

Permalink
Added directory scripts and fixed bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris More committed Mar 22, 2012
1 parent 0993ade commit b283469
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 41 deletions.
9 changes: 6 additions & 3 deletions check-domain.sh
Expand Up @@ -18,7 +18,7 @@ if echo "$address" | grep -i '^ftp'; then
fi

#Check the status code of the address
response=$(curl --write-out %{http_code} --silent --output /dev/null $pro://$address)
response=$(curl -k --write-out %{http_code} --silent --output /dev/null $pro://$address)

#Determine a human readable status code message
if [ $response == "200" ] || [ $response == "226" ]; then
Expand All @@ -32,14 +32,15 @@ elif [ $response == "500" ]; then
status="Error: $response Internal Server Error"
status_type="error"
elif [ $response == "301" ] || [ $response == "302" ]; then

# Check to see if a website is just redirecting from http to https
website_redirected=$(curl --write-out %{url_effective} --silent --output /dev/null -L $pro://$address)
website_redirected=$(curl -k --write-out %{url_effective} --silent --output /dev/null -L $pro://$address)
domain=`echo $website_redirected | sed -r 's/^(.+\/\/)([^/]+)(.*)/\2/'`

if [[ "$domain" == "$address" ]]; then
# website redireted, but stayed on domain.
# Check to make sure if the website was redirected, that it did not redirected to a 404 page.
response=$(curl --write-out %{http_code} --silent --output /dev/null $website_redirected)
response=$(curl -k --write-out %{http_code} --silent --output /dev/null $website_redirected)
if [ $response == "404" ]; then
status="Error: $response Not Found"
status_type="error"
Expand Down Expand Up @@ -142,5 +143,7 @@ input_len=`wc -l $input | sed -r 's/^([0-9]+) (.+)/\1/g'`
output_len=`wc -l $output | sed -r 's/^([0-9]+) (.+)/\1/g'`

if [ "$input_len" == "$output_len" ]; then
echo "Creating wiki"
./create-wiki.sh
echo "Done."
fi
8 changes: 6 additions & 2 deletions check-ignore.sh
Expand Up @@ -2,11 +2,15 @@

domain=$1

ignore_domains="allizom|\.stage|stage\.|-stage|stage-|-cdn|-dev|\.dmz\.|sjc1\.|-phx\.|-sjc|\.brasstacks\.|-mirror|pfs2|-static|-www|-nii0|-origin|-proxy|^dm-|-mpt\.|^dev\.|mozilla\.net$|ecmascript\.org$|opentimetable\.jp$|-new\.|^m\.|-test\.|people\.mozilla\.com|people\.mozilla\.org"
ignore_domains="allizom|\.stage|stage\.|-stage|stage-|-cdn|-dev|\.dmz\.|sjc1\.|-phx\.|-sjc|\.brasstacks\.|-mirror|pfs2|-static|-www|-nii0|-origin|-proxy|^dm-|-mpt\.|^dev\.|mozilla\.net$|ecmascript\.org$|opentimetable\.jp$|-new\.|^m\.|-test\.|people\.mozilla\.com|people\.mozilla\.org|hg_trunk|outgoing\.mozilla\.org|hg\.frenchmozilla\.fr|^ns[0-3]\.|^arecibo\.|^graphite\.|events\.mozilla\-europe\.org"

ignore_domain_check=`echo $domain | grep -i -E $ignore_domains | wc -l | sed 's/ //g'`

if [ $ignore_domain_check == 0 ]; then
address=`./get-redirected-address.sh $domain`

ignore_body=`curl -sk $address | grep -i -E "<h1>It works\!</h1>|<h1>Index of /</h1>|<h1>Forbidden</h1>" | wc -l | sed 's/ //g'`

if [ $ignore_domain_check == 0 ] && [ $ignore_body == 0 ]; then
echo "0"
else
echo "1"
Expand Down
2 changes: 2 additions & 0 deletions create-wiki.sh
Expand Up @@ -183,3 +183,5 @@ echo "|}
This wiki page is automatically generated by scripts. Please contact [https://ldap.mozilla.org/phonebook/tree.php#search/cmore@mozilla.com Chris More] for more
information. The source script for this page can be found [https://github.com/chrismore/Domain-Name-Status-Checker here]." >> $output

echo "Done."
48 changes: 14 additions & 34 deletions get-redirected-address.sh
@@ -1,40 +1,20 @@
#!/bin/bash

pro="http"
address=$1
address="http://$1"
response="301"

# Find redirected address
count=0
while [ $response == "301" ] || [ $response == "302" ]
do
address=$(curl -k --write-out %{url_effective} --silent --output /dev/null -L $address)
response=$(curl -k --write-out %{http_code} --silent --output /dev/null $address)

# Check to see if a website is just redirecting from http to https
website_redirected=$(curl --write-out %{url_effective} --silent --output /dev/null -L $pro://$address)
if [ "https://$address/" == "$website_redirected" ]; then
pro="https"
# Check redirector again incase it redirects a second time (localization)
website_redirected2=$(curl --write-out %{url_effective} --silent --output /dev/null -L $website_redirected)
if [ "$website_redirected2" == "" ]; then
#If the website did not redirect again after switching to https, then set address_final to current address.
address_final=$website_redirected
else
#If the website redirected a second time, set the address_final variable to the second redirected address.
address_final=$website_redirected2
fi
else
# website stayed http
if [[ "$website_redirected" != "" ]]; then
# website redirected, but stayed on the same domain. Probably l10n redirection.
website_redirected2=$(curl --write-out %{url_effective} --silent --output /dev/null -L $website_redirected)

if [ "$website_redirected2" == "" ]; then
# website did not redirect to a subdirectory.
address_final=$website_redirected
else
#If the website redirected a second time, set the address_final variable to the second redirected address.
address_final=$website_redirected2
fi

else
address_final="$pro://$address"
fi
(( count++ ))

if [ $count == 10 ]; then
break
fi

echo $address_final
done

echo $address
6 changes: 4 additions & 2 deletions get-title.sh
Expand Up @@ -3,18 +3,20 @@
address=$1

response=$(curl --write-out %{http_code} --silent --output /dev/null http://$address)
badtitles="Authorization Required"
badtitles="Authorization Required|Index of"

if [ $response != "200" ]; then
address=$(curl --write-out %{url_effective} --silent --output /dev/null -L http://$address)
fi

title=`curl -s $address | grep -i "title>" | sed ':a;N;$!ba;s/\n//g' | sed -r "s/^[^<]+//g" | sed -r "s/<title>//gI" | sed -r "s/<\/title>//gI" | sed -r "s/([^<]+)(.*)/\1/g" | sed 's/[^a-z0-9\-\: ]*$//g' | sed -r 's/<\/*[^>]+\/*>//g'`
title=`curl -sk $address | grep -i "title>" | sed ':a;N;$!ba;s/\n//g' | sed -r "s/^[^<]+//g" | sed -r "s/<title>//gI" | sed -r "s/<\/title>//gI" | sed -r "s/([^<]+)(.*)/\1/g" | sed 's/[^a-z0-9\-\: ]*$//g' | sed -r 's/<\/*[^>]+\/*>//g'`

if [ "$title" != "" ]; then
ignore_address=`echo $title | grep -i -E "$badtitles" | wc -l | sed 's/ //g'`

if [ $ignore_address == 0 ]; then
echo $title
fi
else
echo "$1"
fi

0 comments on commit b283469

Please sign in to comment.