# Domain Spinning Auto-Checker

Once you've Saved the Configs for a list of domains, you can enter said list here, and this code will automatically do the following every ~30 seconds:

- Check each domain in your list
- Let you know if they're ready, and whether or not they've shunted correctly
- Let you know how many are left, ocassionally in cowboy parlance and rarely in pirate parlance

And if nothing happens, every once in a while it'll tell you how many minutes it's been running, and how many domains are left.

To operate this, just toss the domains you'd like to check in the first cell, then press >Run twice. That's it!

Finally, it works *almost* perfectly, but you'll want to double check at least the remon domains, and it'll stop running after three hours - at that point, you should probably check and respin any undone domains anyway.

In [3]:
import numpy as np

# internet
from bs4 import BeautifulSoup
from selenium import webdriver
import requests

# system and time
import time
import os

# partial and pandas
from functools import partial

'''
-----------------------
Setup - Enter URLs here
-----------------------
'''

urls = '''fake_url1.fyi
fake_url2.fyi
fake_url3.zone
'''.splitlines()

In [4]:
hour_cap = 3  # the number of hours for which you'd like this to run

long_time_counter = 10  # minutes after which you'd like fewer notifications

# have to set up the following outside the loop

start_time = time.perf_counter()  # reference time
time.sleep(.1)
run_time = (time.perf_counter() - start_time)/60  # for while() logic purposes

# domain bins upon completion or interruption - these get passed to Selenium
google_url_storage, yahoo_url_storage, fail_url_storage = [], [], []
# ^ probably a ~slightly~ cleaner way to do that, particularly on the RHS - np?

# misc
num_left = len(urls)
long_time_bool = False
print()

'''
---------
Functions
---------
'''


def fun_roll(num_left):
    '''
    If you want the comp to say more than "there are n left"
    Feel free to add an alien version, a robot version; have at it
    '''

    die_roll = np.random.uniform(0, 1, 1)[0]  # random on (0,1)

    if die_roll < .88:
        os.system(f"say '{num_left} left'")
    elif die_roll >= .88 and die_roll < .98:
        # 10% roll
        os.system(f"say 'I reckon there are {num_left} left, partner'")
    else:
        # 2% roll
        os.system(f"say 'Yar, there be {num_left} left, me hearty'")


def url_checker(urls):
    '''
    The gist:
        - first, try to load URLs, and if they load to something with 'script'
        fields and either yahoo or google links, push the url into the relevant
        buckets
        - second, since correctly shunting google domains can apparently,
        temporarily, show yahoo links, try any yahoo domains again after 30s
        - third, print the results

    Note: this may artificially take 30s to run - no need to worry if so
    '''

    google_urls, yahoo_urls, fail_urls = [], [], []

    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) '
               + 'AppleWebKit/537.36 (KHTML, like Gecko) '
               + 'Chrome/39.0.2171.95 Safari/537.36'}

    for url in urls:

        # try to load the page
        try:
            page = requests.get(f'http://{url}', headers=headers)
            soup = BeautifulSoup(page.content, 'html.parser')
        except:
            fail_urls.append(url)

        # then, does it have google links?
        try:
            if any(['google' in scrpt.string for scrpt in
                    soup.body.findAll('script')]):
                google_urls.append(url)
        except:
            pass

        # how about yahoo?
        try:
            if any(['.y.' in link['href'] for link in soup.findAll('a')]):
                yahoo_urls.append(url)
        except:
            pass

    # robust to false remon - could be an option
    # so far, experimental
    # basically, if we get false Yahoos, wait 30s, try again, and update

    if yahoo_urls:

        time.sleep(30)  # wait - this may fix the Yahoo remon bug
        yahoo_actual_urls = yahoo_urls.copy()

        for url in yahoo_urls:
            page = requests.get(f'http://{url}', headers=headers)
            soup = BeautifulSoup(page.content, 'html.parser')

            try:
                if any(['google' in scrpt.string for scrpt in
                        soup.body.findAll('script')]):
                    google_urls.append(url)
                    yahoo_actual_urls.drop(url)
            except:
                yahoo_url_storage.append(url)

        yahoo_urls = yahoo_actual_urls.copy()

    return google_urls, yahoo_urls, fail_urls


def url_viewer(urls):
    '''
    Opens all domains one by one in new tabs to verify proper KW honor rate
    '''

    # start driver
    driver = webdriver.Chrome(executable_path='drivers/chromedriver')

    for url in urls:
        driver.get(f"https://{url}")
        time.sleep(3)
        driver.switch_to.new_window('tab')

    driver.quit()

    print(), print('URLs tested:'), print(), print(*urls, sep='\n')


# %%

'''
-----------
Script Body
-----------
'''

while(int(run_time / 60) < hour_cap):
    '''
    The gist:
        - first, try to load URLs, and if they load to something with 'script'
        fields and either yahoo or google links, push the url into the relevant
        buckets
        - second, since correctly shunting google domains can apparently,
        temporarily, show yahoo links, try any yahoo domains again after 30s
        - third, print the results
        - fourth, print the run time - all of the fluff is just to make it
        look nicer and minimize spamming the console
        - is it running for more than hour_cap hours? If so, bail
    '''

    keep_urls = urls.copy()  # this allows us to remove urls from the list

    # url fishing

    google_urls, yahoo_urls, fail_urls = url_checker(urls)

    # url update printing
    # note: long term, I should look into alerting vs just printing

    if (google_urls + yahoo_urls + fail_urls):

        # remove urls from running urls list
        for url in google_urls + yahoo_urls + fail_urls:
            keep_urls.remove(url)

        # merge the new list
        urls = keep_urls.copy()

        # alerts, storage updates
        if google_urls:
            print('  --Goood to go--')
            print(*google_urls, sep='\n')
            google_url_storage += google_urls
        if yahoo_urls:
            print('  --Remon needed--')
            print(*yahoo_urls, sep='\n')
            yahoo_url_storage += yahoo_urls
        if fail_urls:
            print('  --Failed to load--')
            print(*fail_urls, sep='\n')
            fail_url_storage += fail_urls
        print('-')

        # how many left? Or, done
        num_left = len(keep_urls)

        if num_left > 0:
            fun_roll(num_left)  # or, lame, print "x left"
        else:
            # if we pull the last url, we're done
            print('\a')
            time.sleep(.5)
            print('\a')  # double ping
            os.system("say 'done'")
            break

    # run time handling

    run_time = (time.perf_counter() - start_time)/60

    # if run time is over an hour (or whatever), fewer updates

    if run_time > long_time_counter:

        long_time_bool = True  # only need this once, technically
        hours, minutes = int(run_time / 60), int(run_time % 60)
        # int takes floor, which we want; 1.43 hours -> 1 hour

        if hours > 0:
            print(f'Run time: {hours} hours, {minutes} minutes - doms left: {num_left}')
            long_time_counter += 5  # tell me every 5 minutes that nothing happened
        else:
            print(f'Run time: {minutes} minutes - doms left: {num_left}')
            long_time_counter += 2  # tell me every 2 minutes
        print('-')
        time.sleep(60)

    elif long_time_bool is False:
        print(f'Run time: {run_time:0.1f} minutes - doms left: {num_left}')
        print('-')
        time.sleep(30)

# once time limit exceeded, print remainder:

print('Summary:')
if google_url_storage:
    print(), print('  --Good to go--')
    print(*google_url_storage, sep='\n')
if yahoo_url_storage:
    print(), print('  --Remon needed--')
    print(*yahoo_url_storage, sep='\n')
if fail_url_storage:
    print(), print('  --Failed to load--')
    print(*fail_url_storage, sep='\n')
if urls:
    print(), print('  --Never monetized--')
    print(*urls, sep='\n')


  --Goood to go--
a-great-us-vinyl-flooring.fyi
  --Remon needed--
fetch-an-us-hardwood-flooring.fyi
-
Run time: 0.6 minutes - doms left: 1
-
Run time: 1.1 minutes - doms left: 1
-
Run time: 1.6 minutes - doms left: 1
-
Run time: 2.1 minutes - doms left: 1
-


KeyboardInterrupt: 

### Nice

Good for you for checking down here! Did you interrupt the script, but want a summary of what happened while it ran? Here you go:

In [8]:
print('Summary:')
if google_url_storage:
    print(), print('  --Good to go--')
    print(*google_url_storage, sep='\n')
if yahoo_url_storage:
    print(), print('  --Remon needed--')
    print(*yahoo_url_storage, sep='\n')
if fail_url_storage:
    print(), print('  --Failed to load--')
    print(*fail_url_storage, sep='\n')
if urls:
    print(), print('  --Never monetized--')
    print(*urls, sep='\n')

Summary:

  --Good to go--
a-great-us-vinyl-flooring.fyi

  --Remon needed--
fetch-an-us-hardwood-flooring.fyi

  --Never monetized--
have-us-psoriatic-arthritis.fyi
