# Scraping MoonBoard Site

In [None]:
import os
import time
import pickle

from selenium import webdriver

In [None]:
SLEEP = 1
NUM_TRIES = 10

username = 'howardclimbs'
password = ''

driver_path = ''
moonboard_url = 'https://moonboard.com/'

## Login Functions

In [None]:
def load_browser(driver_path):
    """
    Loads an incognito Chrome browser object, given a chromedriver path
    """
    option = webdriver.ChromeOptions()
    option.add_argument(' — incognito')
    
    browser = webdriver.Chrome(executable_path=driver_path, options=option)
    browser.set_window_size(1500, 910)
    return browser


def find_element(browser, tag_name, attribute, value):
    """
    Locates an element given a tag name, attribute, and value
    """
    elem = None
    num_tries = NUM_TRIES
    for i in range(num_tries):
        try:
            elems = browser.find_elements_by_tag_name(tag_name)
            for e in elems:
                if e.get_attribute(attribute)==value:
                    elem=e
                    break
            if elem!=None:
                break
        except:
            time.sleep(SLEEP)
            continue
    if elem==None:
        print('Failed to find ' + str(attribute) + ' ' + str(value))
    return elem


def find_and_click(browser, tag_name, attribute, value):
    """
    Wrapper for find_element that also implements click
    """
    elem = None
    num_tries = NUM_TRIES
    for i in range(num_tries):
        try:
            elem = find_element(browser, tag_name, attribute, value)
            if elem!=None:
                elem.click()
                break
        except:
            time.sleep(SLEEP)
            continue
    if elem==None:
        print('Failed to click')
    return elem


def get_elem_set(browser, tag_name, attr_dict):
    """
    Collects a set of elements, given that all attributes match
    """
    elem_set = []
    num_tries = NUM_TRIES
    for i in range(num_tries):
        try:
            elems = browser.find_elements_by_tag_name(tag_name)
            for e in elems:
                if all(e.get_attribute(attr)==attr_dict[attr] for attr in attr_dict):
                    elem_set.append(e)
        except:
            time.sleep(SLEEP)
            continue
    return elem_set


def click_login_area(browser):
    """
    Clicks on login area to pull up username and password field
    """
    login_elem = None
    a_elems = browser.find_elements_by_tag_name('a')
    for a in a_elems:
        if a.text=='LOGIN/REGISTER':
            login_elem = a
            break
    if login_elem==None:
        print('Failed to find Login Button')
    else:
        login_elem.click()
    return login_elem


def input_user_pass_login(browser, username, password):
    """
    Populates username and password fields after accessing login area
    """
    username_elem = None
    password_elem = None
    input_elems = browser.find_elements_by_tag_name('input')
    
    # Iterate through input elements
    for i in input_elems:
        if i.get_attribute('placeholder')=='Username':
            username_elem = i
        if i.get_attribute('placeholder')=='Password':
            password_elem = i
    
    # Check that valid elements are returned
    if username_elem==None:
        print('Failed to find username field')
    if password_elem==None:
        print('Failed to find password field')
    
    # Populate fields
    if username_elem!=None and password_elem!=None:
        username_elem.send_keys(username)
        password_elem.send_keys(password)
    
    return username_elem, password_elem


def click_login_button(driver):
    """
    Clicks login button
    """
    return find_and_click(driver, 'button', 'type', 'submit')


def loginMoonBoard(browser, url='', username='', password=''):
    """
    Logs in to MoonBoard site
    """
    browser.get(url)
    
    # Get login element
    login_elem = click_login_area(browser)
    if login_elem==None:
        return None
    
    # Fill in credentials and login
    username_elem, password_elem = input_user_pass_login(browser, username, password)
    login_button = click_login_button(browser)
    
    if username_elem==None or password_elem==None or login_button==None:
        return None
    
    return None

## Check Out Problems

In [None]:
def click_view_problems(browser):
    """
    Accesses 'View' under 'Problems' sidebar
    """
    click_problems = find_and_click(browser, 'a', 'id', 'lProblems')
    click_view = find_and_click(browser, 'li', 'id', 'm-viewproblem')
    return None


def click_holdsetup(browser, holdsetup='MoonBoard 2016'):
    """
    Set the proper hold configuration
    """
    target_elem = None
    
    # Finds hold setup dropdown
    elems = browser.find_elements_by_tag_name('select')
    for e in elems:
        if e.get_attribute('id')=='Holdsetup':
            target_elem = e
            break
    if target_elem==None:
        print('Failed to find Holdsetup')
        return target_elem
    
    # Selects appropriate dropdown item
    elems = target_elem.find_elements_by_tag_name('option')
    target_elem = None
    for e in elems:
        if e.text==holdsetup:
            target_elem=e
    if target_elem==None:
        print('Failed to find ' + holdsetup)
        return target_elem
    
    # Select hold configuration
    target_elem.click()
    return target_elem 


def click_comments(browser):
    """
    Clicks 'comments' button to bring up route comments
    """
    find_and_click(browser, 'a', 'id', 'tbtnComments')
    return


def get_current_page(browser):
    """
    Gets the index of the current page of routes (bottom bar)
    """
    pager_elem = None
    page_elem = None
    
    # Pull elements with 'div' tag
    elems = browser.find_elements_by_tag_name('div')
    for e in elems:
        if e.get_attribute('data-role')=='pager':
            pager_elem = e
            break
    if pager_elem==None:
        print('Failed to find pager')
        return pager_elem
    
    # Pull elements with 'span' tag
    page_elems = pager_elem.find_elements_by_tag_name('span')
    for e in page_elems:
        if e.get_attribute('class')=='k-state-selected':
            page_elem = e
            break
    if page_elem==None:
        print('Failed to find page')
        return page_elem
    
    return int(page_elem.text)


def click_next_page(browser, current_page=1):
    """
    Clicks on button to access next page of routes
    """
    next_page = current_page+1
    page_elem = find_and_click(browser, 'a', 'data-page', str(next_page))
    return page_elem


def get_problems(browser):
    """
    For a single page, get problem IDs and problem objects
    """
    problems = []
    data_ids = []
    elems = browser.find_elements_by_tag_name('tr')
    
    for e in elems:
        uid = e.get_attribute('data-uid')
        check1 = uid!=None
        check2 = e.get_attribute('onclick')=='problemSelected();'
        if check1 and check2:
            data_ids.append(uid)
            problems.append(e)
    
    return problems, data_ids


def get_repeats_data(browser):
    """
    Grabs repeats text data for a given page
    """
    repeats_data = []
    attr_dict={'class':'repeats'}
    repeats = get_elem_set(browser, 'div', attr_dict)
    
    for repeat in repeats:
        repeats_data.append([r.text for r in repeat.find_elements_by_tag_name('p')])
    
    return repeats_data


def get_problem_meta(problem):
    """
    Finds metadata tags from a problem object
    """
    meta = {}
    
    h3 = problem.find_elements_by_tag_name('h3')[0]
    meta['problem_name'] = h3.text
    meta['info'] = [p.text for p in problem.find_elements_by_tag_name('p')]
    meta['url'] = h3.find_elements_by_tag_name('a')[0].get_attribute('href')
    
    # Rating information
    stars = [star.get_attribute('src') for star in problem.find_elements_by_tag_name('img')]
    stars = [star for star in stars if 'star' in star]
    meta['num_empty'] = len([star for star in stars if 'empty' in star])
    meta['num_stars'] = len(stars) - meta['num_empty']
    
    return meta


def process_all_problems(browser, problems_dict):
    """
    For a given page, collect all problems' metadata
    """
    problems, data_ids = get_problems(browser)
    for i, problem in enumerate(problems):
        if data_ids[i] in problems_dict:
            continue
        problems_dict[data_ids[i]] = get_problem_meta(problem)
    
    return problems_dict


def process_all_pages(browser, save_path=''):
    """
    Processes all moonboard pages and saves results into a dictionary
    """
    num_tries = 20
    
    # Load problems dict, if it exists
    problems_dict = {}
    if os.path.exists(save_path):
        problems_dict = pickle.load(open(save_path,'rb'))
    
    found_page = True
    current_page = get_current_page(browser)
    while found_page:
        for i in range(num_tries):
            try:
                problems_dict = process_all_problems(browser, problems_dict)
                break
            except:
                print('Failed to process problems on page ' + str(current_page))
                time.sleep(SLEEP)
                continue
        
        # Save intermediate result
        if save_path!='':
            pickle.dump(problems_dict, open(save_path,'wb'))
        
        # Click to next page
        page_elem = click_next_page(browser, current_page)
        if page_elem==None:
            break
        
        # Flip to next page
        current_page+=1
    
    return problems_dict


def get_num_pages(browser):
    """
    Gets the total number of pages of MoonBoard problems
    """
    found_page = True
    current_page = get_current_page(browser)
    while found_page:
        page_elem = click_next_page(browser, current_page)
        time.sleep(SLEEP)
        if page_elem==None:
            break
        current_page+=1
    return current_page

## Test Out Stuff

In [None]:
# Load browser and login to MoonBoard
browser = load_browser(driver_path)
loginMoonBoard(browser, moonboard_url, username, password)

In [None]:
# Get problems view
time.sleep(2)
click_view_problems(browser)
click_holdsetup(browser)