In [1]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import pickle
import os
import pdb
import time

In [2]:
def load_driver(executable_path='/Users/aaronwu/Documents/misc/tmp/selenium/chromedriver'):
    driver = webdriver.Chrome(executable_path=executable_path)
    return driver

def load_cookies(driver, cookies_path):
    cookies = pickle.load(open(cookies_path,'rb'))
    for cookie in cookies:
        driver.add_cookie(cookie)
    return

def find_element(driver, tag_name, attribute, value):
    elem = None
    num_tries = 10
    for i in range(num_tries):
        try:
            elems = driver.find_elements_by_tag_name(tag_name)
            for e in elems:
                if e.get_attribute(attribute)==value:
                    elem=e
                    break
            if elem!=None:
                break
        except:
            time.sleep(.5)
            continue
    if elem==None:
        print('Failed to find ' + str(attribute) + ' ' + str(value))
    return elem

def find_and_click(driver, tag_name, attribute, value):
    elem = None
    num_tries = 10
    for i in range(num_tries):
        try:
            elem = find_element(driver, tag_name, attribute, value)
            if elem!=None:
                elem.click()
                break
        except:
            time.sleep(.5)
            continue
    if elem==None:
        print('Failed to click')
    return elem

def get_elem_set(driver, tag_name, attr_dict):
    elem_set = []
    num_tries = 10
    for i in range(num_tries):
        try:
            elems = driver.find_elements_by_tag_name(tag_name)
            for e in elems:
                if all(e.get_attribute(attr)==attr_dict[attr] for attr in attr_dict):
                    elem_set.append(e)
        except:
            time.sleep(.5)
            continue
    return elem_set

def click_login_area(driver):
    login_elem = None
    a_elems = driver.find_elements_by_tag_name('a')
    for a in a_elems:
        if a.text=='LOGIN/REGISTER':
            login_elem = a
            break
    if login_elem==None:
        print('Failed to find Login Button')
    else:
        login_elem.click()
    return login_elem

def click_relogin(driver):
    return find_and_click(driver, 'a', 'title', 'Go to dashboard')

def click_login_button(driver):
    return find_and_click(driver, 'button', 'type', 'submit')

def input_user_pass_login(driver, username, password):
    username_elem = None
    password_elem = None
    input_elems = driver.find_elements_by_tag_name('input')
    for i in input_elems:
        if i.get_attribute('placeholder')=='Username':
            username_elem = i
        if i.get_attribute('placeholder')=='Password':
            password_elem = i
    if username_elem==None:
        print('Failed to find username field')
    if password_elem==None:
        print('Failed to find password field')
    if username_elem!=None and password_elem!=None:
        username_elem.send_keys(username)
        password_elem.send_keys(password)
    return username_elem, password_elem
    
def loginMoonBoard(driver, url="https://www.moonboard.com/", username='', password='', cookies_path=''):
    driver.get(url)
    loaded_cookies = False
    if os.path.exists(cookies_path):
        try:
            load_cookies(driver, cookies_path)
            loaded_cookies = True
        except:
            print('Cookies expired')
    
    login_elem = click_login_area(driver)
    if login_elem==None:
        return
    if loaded_cookies == True:
        relogin_elem = click_relogin(driver)
        if relogin_elem!=None:
            return
    
    username_elem, password_elem = input_user_pass_login(driver, username, password)
    login_button = click_login_button(driver)
    if username_elem==None or password_elem==None or login_button==None:
        return
    cookies = driver.get_cookies()
    pickle.dump(cookies, open(cookies_path,'wb'))
    return



In [1]:
def click_view_problems(driver):
    click_problems = find_and_click(driver, 'a', 'id', 'lProblems')
    click_view = find_and_click(driver, 'li', 'id', 'm-viewproblem')
    return

def click_comments(driver):
    find_and_click(driver, 'a', 'id', 'tbtnComments')
    return

def click_holdsetup(driver, holdsetup='MoonBoard 2016'):
    target_elem = None
    elems = driver.find_elements_by_tag_name('select')
    for e in elems:
        if e.get_attribute('id')=='Holdsetup':
            target_elem = e
            break
    if target_elem==None:
        print('Failed to find Holdsetup')
        return target_elem
    elems = target_elem.find_elements_by_tag_name('option')
    target_elem = None
    for e in elems:
        if e.text==holdsetup:
            target_elem=e
    if target_elem==None:
        print('Failed to find ' + holdsetup)
        return target_elem
    target_elem.click()
    return target_elem 

def get_current_page(driver):
    pager_elem = None
    page_elem = None
    elems = driver.find_elements_by_tag_name('div')
    for e in elems:
        if e.get_attribute('data-role')=='pager':
            pager_elem = e
            break
    if pager_elem==None:
        print('Failed to find pager')
        return pager_elem
    page_elems = pager_elem.find_elements_by_tag_name('span')
    for e in page_elems:
        if e.get_attribute('class')=='k-state-selected':
            page_elem = e
            break
    if page_elem==None:
        print('Failed to find page')
        return page_elem
    return int(page_elem.text)

def click_next_page(driver, prev_page=-1):
    current_page = prev_page+1
    page_elem = find_and_click(driver, 'a', 'data-page', str(current_page))
    return page_elem

def get_problems(driver):
    problems = []
    data_ids = []
    elems = driver.find_elements_by_tag_name('tr')
    
    for e in elems:
        uid = e.get_attribute('data-uid')
        check1 = uid!=None
        check2 = e.get_attribute('onclick')=='problemSelected();'
        if check1 and check2:
            data_ids.append(uid)
            problems.append(e)
    return problems, data_ids

def get_repeats_data(driver):
    repeats_data = []
    attr_dict={'class':'repeats'}
    repeats = get_elem_set(driver, 'div', attr_dict)
    for repeat in repeats:
        repeats_data.append([r.text for r in repeat.find_elements_by_tag_name('p')])
    return repeats_data

def get_problem_meta(problem):
    meta = {}
    h3 = problem.find_elements_by_tag_name('h3')[0]
    meta['user'] = h3.text
    meta['info'] = [p.text for p in problem.find_elements_by_tag_name('p')]
    meta['url'] = h3.find_elements_by_tag_name('a')[0].get_attribute('href')
    stars = [star.get_attribute('src') for star in problem.find_elements_by_tag_name('img')]
    stars = [star for star in stars if 'star' in star]
    meta['num_empty'] = len([star for star in stars if 'empty' in star])
    meta['num_stars'] = len(stars) - meta['num_empty']
    return meta
    
def get_problem_info(problem):
    meta_data = get_problem_meta(problem)
    return meta_data

def process_all_problems(driver, problems_dict):
    problems, data_ids = get_problems(driver)
    for i,problem in enumerate(problems):
        if data_ids[i] in problems_dict:
            continue
        problems_dict[data_ids[i]] = get_problem_info(problem)
    return problems_dict

def process_all_pages(driver, save_path=''):
    num_tries = 20
    problems_dict = {}
    if os.path.exists(save_path):
        problems_dict = pickle.load(open(save_path,'rb'))
    found_page = True
    current_page = get_current_page(driver)
    while found_page:
        for i in range(num_tries):
            try:
                problems_dict = process_all_problems(driver, problems_dict)
                break
            except:
                print('Failed to process problems on page ' + str(current_page))
                time.sleep(.5)
                continue
        if save_path!='':
            pickle.dump(problems_dict, open(save_path,'wb'))
        page_elem = click_next_page(driver, current_page)
        if page_elem==None:
            break
        current_page+=1
    return problems_dict

def get_num_pages(driver):
    found_page = True
    current_page = get_current_page(driver)
    while found_page:
        page_elem = click_next_page(driver, current_page)
        time.sleep(.5)
        if page_elem==None:
            break
        current_page+=1
    return current_page

In [4]:
executable_path='/Users/aaronwu/Documents/misc/tmp/selenium/chromedriver'
cookies_path = '/Users/aaronwu/Documents/misc/tmp/selenium/moonboard_cookies.pickle'
save_path = '/Users/aaronwu/Documents/misc/tmp/selenium/moonboard_2016_output.pickle'
url = 'https://www.moonboard.com/'
username = 'aaronlwu257@hotmail.com'
password = 'temppass1992'

In [5]:
driver = load_driver(executable_path=executable_path)

In [6]:
loginMoonBoard(driver, url=url, username=username, password=password, cookies_path=cookies_path)

Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to find title Go to dashboard
Failed to click


In [7]:
time.sleep(2)
while True:
    try:
        click_view_problems(driver)
        click_holdsetup(driver)
        break
    except:
        continue
time.sleep(2)

In [15]:
problems_dict = process_all_pages(driver, save_path=save_path)

Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to find data-page 1894
Failed to click
