diff --git a/check-domain.sh b/check-domain.sh index e239b42..3163e04 100755 --- a/check-domain.sh +++ b/check-domain.sh @@ -18,7 +18,7 @@ if echo "$address" | grep -i '^ftp'; then fi #Check the status code of the address -response=$(curl --write-out %{http_code} --silent --output /dev/null $pro://$address) +response=$(curl -k --write-out %{http_code} --silent --output /dev/null $pro://$address) #Determine a human readable status code message if [ $response == "200" ] || [ $response == "226" ]; then @@ -32,14 +32,15 @@ elif [ $response == "500" ]; then status="Error: $response Internal Server Error" status_type="error" elif [ $response == "301" ] || [ $response == "302" ]; then + # Check to see if a website is just redirecting from http to https - website_redirected=$(curl --write-out %{url_effective} --silent --output /dev/null -L $pro://$address) + website_redirected=$(curl -k --write-out %{url_effective} --silent --output /dev/null -L $pro://$address) domain=`echo $website_redirected | sed -r 's/^(.+\/\/)([^/]+)(.*)/\2/'` if [[ "$domain" == "$address" ]]; then # website redireted, but stayed on domain. # Check to make sure if the website was redirected, that it did not redirected to a 404 page. - response=$(curl --write-out %{http_code} --silent --output /dev/null $website_redirected) + response=$(curl -k --write-out %{http_code} --silent --output /dev/null $website_redirected) if [ $response == "404" ]; then status="Error: $response Not Found" status_type="error" @@ -142,5 +143,7 @@ input_len=`wc -l $input | sed -r 's/^([0-9]+) (.+)/\1/g'` output_len=`wc -l $output | sed -r 's/^([0-9]+) (.+)/\1/g'` if [ "$input_len" == "$output_len" ]; then + echo "Creating wiki" ./create-wiki.sh + echo "Done." fi diff --git a/check-ignore.sh b/check-ignore.sh index deb2c4d..79c27e7 100755 --- a/check-ignore.sh +++ b/check-ignore.sh @@ -2,11 +2,15 @@ domain=$1 -ignore_domains="allizom|\.stage|stage\.|-stage|stage-|-cdn|-dev|\.dmz\.|sjc1\.|-phx\.|-sjc|\.brasstacks\.|-mirror|pfs2|-static|-www|-nii0|-origin|-proxy|^dm-|-mpt\.|^dev\.|mozilla\.net$|ecmascript\.org$|opentimetable\.jp$|-new\.|^m\.|-test\.|people\.mozilla\.com|people\.mozilla\.org" +ignore_domains="allizom|\.stage|stage\.|-stage|stage-|-cdn|-dev|\.dmz\.|sjc1\.|-phx\.|-sjc|\.brasstacks\.|-mirror|pfs2|-static|-www|-nii0|-origin|-proxy|^dm-|-mpt\.|^dev\.|mozilla\.net$|ecmascript\.org$|opentimetable\.jp$|-new\.|^m\.|-test\.|people\.mozilla\.com|people\.mozilla\.org|hg_trunk|outgoing\.mozilla\.org|hg\.frenchmozilla\.fr|^ns[0-3]\.|^arecibo\.|^graphite\.|events\.mozilla\-europe\.org" ignore_domain_check=`echo $domain | grep -i -E $ignore_domains | wc -l | sed 's/ //g'` -if [ $ignore_domain_check == 0 ]; then +address=`./get-redirected-address.sh $domain` + +ignore_body=`curl -sk $address | grep -i -E "

It works\!

|

Index of /

|

Forbidden

" | wc -l | sed 's/ //g'` + +if [ $ignore_domain_check == 0 ] && [ $ignore_body == 0 ]; then echo "0" else echo "1" diff --git a/create-wiki.sh b/create-wiki.sh index 4550cc0..929887b 100755 --- a/create-wiki.sh +++ b/create-wiki.sh @@ -183,3 +183,5 @@ echo "|} This wiki page is automatically generated by scripts. Please contact [https://ldap.mozilla.org/phonebook/tree.php#search/cmore@mozilla.com Chris More] for more information. The source script for this page can be found [https://github.com/chrismore/Domain-Name-Status-Checker here]." >> $output + +echo "Done." diff --git a/get-redirected-address.sh b/get-redirected-address.sh index 430085b..34bb1f6 100755 --- a/get-redirected-address.sh +++ b/get-redirected-address.sh @@ -1,40 +1,20 @@ #!/bin/bash -pro="http" -address=$1 +address="http://$1" +response="301" # Find redirected address +count=0 +while [ $response == "301" ] || [ $response == "302" ] +do + address=$(curl -k --write-out %{url_effective} --silent --output /dev/null -L $address) + response=$(curl -k --write-out %{http_code} --silent --output /dev/null $address) -# Check to see if a website is just redirecting from http to https - website_redirected=$(curl --write-out %{url_effective} --silent --output /dev/null -L $pro://$address) - if [ "https://$address/" == "$website_redirected" ]; then - pro="https" - # Check redirector again incase it redirects a second time (localization) - website_redirected2=$(curl --write-out %{url_effective} --silent --output /dev/null -L $website_redirected) - if [ "$website_redirected2" == "" ]; then - #If the website did not redirect again after switching to https, then set address_final to current address. - address_final=$website_redirected - else - #If the website redirected a second time, set the address_final variable to the second redirected address. - address_final=$website_redirected2 - fi - else - # website stayed http - if [[ "$website_redirected" != "" ]]; then - # website redirected, but stayed on the same domain. Probably l10n redirection. - website_redirected2=$(curl --write-out %{url_effective} --silent --output /dev/null -L $website_redirected) - - if [ "$website_redirected2" == "" ]; then - # website did not redirect to a subdirectory. - address_final=$website_redirected - else - #If the website redirected a second time, set the address_final variable to the second redirected address. - address_final=$website_redirected2 - fi - - else - address_final="$pro://$address" - fi + (( count++ )) + + if [ $count == 10 ]; then + break fi - -echo $address_final +done + +echo $address diff --git a/get-title.sh b/get-title.sh index 1fc9bc4..d68fa6e 100755 --- a/get-title.sh +++ b/get-title.sh @@ -3,13 +3,13 @@ address=$1 response=$(curl --write-out %{http_code} --silent --output /dev/null http://$address) -badtitles="Authorization Required" +badtitles="Authorization Required|Index of" if [ $response != "200" ]; then address=$(curl --write-out %{url_effective} --silent --output /dev/null -L http://$address) fi -title=`curl -s $address | grep -i "title>" | sed ':a;N;$!ba;s/\n//g' | sed -r "s/^[^<]+//g" | sed -r "s///gI" | sed -r "s/<\/title>//gI" | sed -r "s/([^<]+)(.*)/\1/g" | sed 's/[^a-z0-9\-\: ]*$//g' | sed -r 's/<\/*[^>]+\/*>//g'` +title=`curl -sk $address | grep -i "title>" | sed ':a;N;$!ba;s/\n//g' | sed -r "s/^[^<]+//g" | sed -r "s/<title>//gI" | sed -r "s/<\/title>//gI" | sed -r "s/([^<]+)(.*)/\1/g" | sed 's/[^a-z0-9\-\: ]*$//g' | sed -r 's/<\/*[^>]+\/*>//g'` if [ "$title" != "" ]; then ignore_address=`echo $title | grep -i -E "$badtitles" | wc -l | sed 's/ //g'` @@ -17,4 +17,6 @@ if [ "$title" != "" ]; then if [ $ignore_address == 0 ]; then echo $title fi +else + echo "$1" fi