# Scraping Holds
There are a total of 140 holds on MoonBoard 2016 configuration. This notebook scrapes:

* Hold positions
* Hold orientations
* Hold images

In [1]:
import os
import sys
import pdb
import time
import copy
import string
import pickle
import requests

from moonboard_helper import *

In [18]:
# Load credentials
with open('./credentials.txt') as f:
    flines = f.readlines()

cred_dict = {s.split('-')[0].strip() : s.split('-')[1].strip() for s in flines}
print(cred_dict)

{'username': 'Shi Hao', 'password': '12345678', 'assets_path': './assets/', 'save_path': '', 'save_path_holds': '', 'save_path_failed': '', 'save_path_final': ''}


In [22]:
# Login credentials
username = cred_dict['username']
password = cred_dict['password']

# Data paths
# driver_path = cred_dict['driver_path']
assets_path = cred_dict['assets_path']
hold_pos_path = assets_path + 'hold_positions.pickle'
hold_img_path = assets_path + 'hold_images/'

# Moonboard hold configurations
hold_setup_name = 'MoonBoard 2016'
hold_set_names = [
    'Original School Holds',
    'Hold Set A',
    'Hold Set B'
]

# Moonboard URLs
moonboard_url = 'https://www.moonboard.com/'

## Helper Functions

In [4]:
def download_img(image_url, image_path):
    """
    Saves image to specified path, given a url
    """
    if not os.path.exists(image_path):
        img_data = requests.get(image_url).content
        with open(image_path, 'wb') as handler:
            handler.write(img_data)
    return None

## Define Position Map
Moonboard hold positions are defined by letters A - K on the horizontal axis (left to right) and numbers 1 - 18 (bottom to up) on the vertical axis. The following maps will be used to alter this mapping to the numpy convention:

* Origin at the upper-left corner of a grid
* Dimension 1 (axis 0) indexed along vertical axis, top-to-bottom increasing
* Dimension 2 (axis 1) indexed along horizontal axis, left-to-right increasing

In [5]:
# Horizontal indexes
uppercases = string.ascii_uppercase

s_char = 'A'
e_char = 'K'

s_idx = uppercases.index(s_char)
e_idx = uppercases.index(e_char)

horiz_pos_map = {uppercases[i]:i for i in range(s_idx, e_idx+1)}
print(horiz_pos_map)

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10}


In [6]:
# Vertical indexes
num_vert = 18

original_range = [i for i in range(1, 19)]
new_range = [i for i in range(17, -1, -1)]

assert(len(original_range) == num_vert)
assert(len(new_range) == num_vert)

vert_pos_map = {original_range[i]: new_range[i] for i in range(num_vert)}
print(vert_pos_map)

{1: 17, 2: 16, 3: 15, 4: 14, 5: 13, 6: 12, 7: 11, 8: 10, 9: 9, 10: 8, 11: 7, 12: 6, 13: 5, 14: 4, 15: 3, 16: 2, 17: 1, 18: 0}


## Access MoonBoard Holds Page

In [10]:
# Start browser and login
browser = load_browser() # selenium no longer needs driver path
loginMoonBoard(browser, url = moonboard_url, username = username, password = password)
time.sleep(5)

In [11]:
# Go to holds setup page
click_problems = find_and_click(browser, 'a', 'id', 'lHoldsetups')
click_view = find_and_click(browser, 'li', 'id', 'm-viewholdsetups')
time.sleep(3)

# Set hold configuration
click_holdsetup(browser, hold_setup_name)

<selenium.webdriver.remote.webelement.WebElement (session="1d2b42ae2f76575b3ec3641c3e358296", element="5E0E2B6DF2E2A86A74A061F9FED5AC10_element_248")>

## Access and Scrape Holds

In [12]:
# Get expansion items
attr_dict = {
    'class':'k-icon k-i-expand',
    'aria-label':'Expand',
    'tabindex':'-1'
}
elems = get_elem_set(browser, 'a', attr_dict)

In [13]:
# Expand everything
for e in elems:
    e.click()
time.sleep(1)

In [14]:
# Collect all hold items
attr_dict = {'role':'row'}
rows = get_elem_set(browser, 'tr', attr_dict)
print('Number of row elements:', len(rows))

Number of row elements: 147


In [23]:
# Initialize hold information dictionary
row_info_dict = {}

# Scrape holds
for i, row in enumerate(rows):
    img_urls = find_element_attr(row, 'img', 'src') 
    
    if img_urls is None:
        continue
    
    # Get hold information
    hold_info = find_element_text(row, 'td')
    assert(len(hold_info) == 3)
    h_number, h_orient, h_pos = hold_info
    
    # Get hold name
    img_url = img_urls[0]
    h = img_url.split('/')[-1].split('.png')[0]
    assert(h.replace('h', '') == h_number)
    
    # Save hold into dictionary
    row_info_dict[h] = {
        'orientation': h_orient,
        'position': h_pos
    }

    # Download hold image
    image_save_name = hold_img_path + '%s.png'%h
    download_img(img_url, image_save_name)

# Number of holds should be 140 for Moonboard 2016
print('\nNumber of scraped holds:', len(row_info_dict))

Failed to find src
Failed to find src
Failed to find src
Failed to find src
Failed to find src
Failed to find src
Failed to find src

Number of scraped holds: 140


In [24]:
# Check that number of images is correct
img_names = [f for f in os.listdir(hold_img_path)]
print('Number of hold images:', len(img_names))

Number of hold images: 140


In [25]:
# Apply coordinate mapping
holds_info_dict = copy.deepcopy(row_info_dict)

for k, v in holds_info_dict.items():
    position = v['position']
    
    axis_0_val = vert_pos_map[int(position[1:])]
    axis_1_val = horiz_pos_map[position[0]]
    
    v['coord_position'] = [axis_0_val, axis_1_val]

In [26]:
# Observe coordinate mapping
print(row_info_dict['h1'])
print(holds_info_dict['h1'])

{'orientation': 'SE', 'position': 'H7'}
{'orientation': 'SE', 'position': 'H7', 'coord_position': [11, 7]}


In [27]:
# Save mined hold dictionary
save_pickle(holds_info_dict, hold_pos_path)

In [28]:
# Close browser
browser.close()