In [5]:
import requests
from bs4 import BeautifulSoup
import time
from urllib import urlencode
import selenium.webdriver
import pandas as pd

In [25]:
def search_mnt_project(url,  browser, delay=5):
    browser.get(url)
    time.sleep(delay)  # Wait a few seconds before getting the HTML source
    return browser.page_source

In [90]:
def find_route_urls(url, route_href_list):
    '''
    INPUT
        - url - a page url 
        - route_href_list - list of href's for routes
    OUTPUT
        - route_href_list - list of href's for routes
        - soup - the html for the given page
    '''
    browser = selenium.webdriver.Firefox()
    html = search_mnt_project(url,browser)
    browser.quit()
    soup = BeautifulSoup(html, 'html.parser')
    table_tag = soup.select('table.objectList')
    product_tags = soup.select('div.search-result-gridview-item')
    route_href_list = find_table_urls(table_tag, route_href_list)
    return route_href_list, soup

In [91]:
def all_route_urls(start_url):
    '''
    find all route urls
    INPUT
        - start_url - first url to go to
    OUTPUT
        - route_href_list - list off all the route's urls
    '''
    # make empty list to fill with route page urls
    route_href_list = [] 
    route_href_list, soup = find_route_urls(start_url, route_href_list)
    # click next page
    while page_url is not None:
        for a in soup.find('td', {'align': 'right'}).findAll('a',href=True):
            if 'Next' in a.text:
                page_url = a.get('href')
            else:
                page_url = None
        route_href_list, soup = find_route_urls(page_url, route_href_list)
    return route_href_list

In [92]:
def find_table_urls(table_tag, href_list):
    '''
     Pulls route urls from table
    '''
    for t in table_tag:
        for row in t.findAll('tr'):
            stars = row.findAll('td')[1].find('span',{'class': 'small textLight'})
            # stop if not review
            if stars != None:
                if str(stars.text) == ' (0)':
                    continue
            a = row.findAll('td')[0].find('a', href=True)
            if a != None:
                href_list.append(a.get('href'))
    return href_list

In [86]:
def route_page_info(query, d):
    '''
    INPUT
        - query 
        - d - dict to append to
    
    OUTPUT
    
    '''
    browser = selenium.webdriver.Firefox()
    url = "https://www.mountainproject.com%s" % query
    html = search_mnt_project(url, browser)
    browser.quit()
    soup = BeautifulSoup(html, 'html.parser')

    page_tag = soup.find('div', {'id':'rspCol800'})
    d['name'] = page_tag.find('span', {'itemprop':'itemreviewed'}).text
    d['grade'] = page_tag.find('span', {'class':'rateYDS'}).text
    route_stars_text = soup.find('span', {'id':'starSummaryText'}).text.split('Average: ')
    d['stars'] = route_stars_text[1][:3]
    # convert to string from unicode
    star_url = str(soup.find('span', {'id':'starSummaryText'}).find('a', href=True).get('href'))
    
    for i, td in enumerate(page_tag.find('table').findAll('td')): 
        if td.text.split(':')[0] == 'Type':
            d['type'] = page_tag.find('table').findAll('td')[i+1].text
        elif td.text.split(':')[0] == 'Original':
            d['original_grade'] = page_tag.find('table').findAll('td')[i+1].text
        elif td.text.split(':')[0] == 'FA':
            fa = page_tag.find('table').findAll('td')[i+1].text
            if str(fa) == '?':
                fa = ''
            d['FA'] = fa
        elif td.text.split(':')[0] == 'Season':
            season = page_tag.find('table').findAll('td')[i+1].text
            d['season'] = season
        elif td.text.split(':')[0] == 'Page Views':
            d['page_views'] = page_tag.find('table').findAll('td')[i+1].text
        elif td.text.split(':')[0] == 'Submitted By':
            d['submitted_by'] = page_tag.find('table').findAll('td')[i+1].text
            
    return star_url, d

In [62]:
def make_route_dict():
    route_dict = {'name': [],'grade':[],'stars':[],'type':[],'original_grade':[],'FA':[],'season':[],'page_views':[],
             'submitted_by':[]}
    return route_dict

In [82]:
from pymongo import MongoClient
import json
def add_to_database(route_d):
    client = MongoClient('mongodb://localhost:27017/')
    db = client.route_info
    route_info = db.route_info
    route_info.insert_one(route_d)

In [87]:
rock_url = '''https://www.mountainproject.com/scripts/Search.php?searchType=
            routeFinder&minVotes=0&selectedIds=105708966&type=rock&diffMinrock=
            800&diffMinboulder=20000&diffMinaid=70000&diffMinice=30000&diffMinmixed=
            50000&diffMaxrock=12400&diffMaxboulder=21400&diffMaxaid=75260&diffMaxice=
            38500&diffMaxmixed=60000&is_trad_climb=1&is_sport_climb=1&is_top_rope=
            1&stars=0&pitches=0&sort1=area&sort2=rating'''
# returns all route urls
route_urls = all_route_urls(rock_url)
star_urls = []
for i, quary in enumerate(route_urls):
    route_dict = make_route_dict()
    star_url, route_d = route_page_info(quary,route_dict) # updates route_df and return star_url
    add_to_database(route_d)
    star_urls.append(star_url) 

KeyboardInterrupt: 

In [89]:
client = MongoClient('mongodb://localhost:27017/')
db = client.route_info
route_info = db.route_info
raw_data = route_info.find()
df = pd.DataFrame(list(raw_data))
df.head(100)

Unnamed: 0,FA,_id,grade,name,original_grade,page_views,season,stars,submitted_by,type
0,,58c7249740b44128791b7e3a,YDS: 5.7,5.7,YDS: 5.7 French: 5a Ewbanks: 15 UIAA: V+ ...,317,[],2 s,"Keenan Waeschle on Mar 31, 2010","Sport, 40'"
1,,58c724a140b44128791b7e3b,YDS: 5.8,5.8,YDS: 5.8 French: 5b Ewbanks: 16 UIAA: VI- ...,253,[],1.5,"Keenan Waeschle on Mar 31, 2010","Sport, 40'"
2,,58c724ad40b44128791b7e3c,YDS: Easy 5th,Brothers Traverse,YDS: Easy 5th French: 1+ Ewbanks: 3 UIAA: ...,4643,spring,3 s,"Eric Fjellanger on Jul 20, 2009","Trad, Alpine"
3,,58c724b840b44128791b7e3d,YDS: Easy 5th,South Corner,YDS: Easy 5th French: 1+ Ewbanks: 3 UIAA: ...,200,[],2.5,"Matt. B. on Sep 13, 2015","Trad, Alpine, 2 pitches, 150', Grade II"
4,1965 Arnie & Diane Bloomer,58c724c240b44128791b7e3e,YDS: 3rd,Honeymoon Route,YDS: 3rd French: 1- Ewbanks: 1 UIAA: I ZA...,11,[],2 s,"JeremyJ on Mar 3, 2017","Trad, Alpine"
5,?,58c724cd40b44128791b7e3f,YDS: 5.10a,The Dinner Plate,YDS: 5.10a French: 6a Ewbanks: 18 UIAA: VI...,839,[],3 s,"Joshua Dreher on Sep 28, 2010","Sport, 190'"
6,,58c726b740b44128791b7e43,YDS: 5.7,5.7,YDS: 5.7 French: 5a Ewbanks: 15 UIAA: V+ ...,317,[],2 s,"Keenan Waeschle on Mar 31, 2010","Sport, 40'"
7,,58c726c140b44128791b7e44,YDS: 5.8,5.8,YDS: 5.8 French: 5b Ewbanks: 16 UIAA: VI- ...,253,[],1.5,"Keenan Waeschle on Mar 31, 2010","Sport, 40'"
8,,58c726cc40b44128791b7e45,YDS: Easy 5th,Brothers Traverse,YDS: Easy 5th French: 1+ Ewbanks: 3 UIAA: ...,4643,spring,3 s,"Eric Fjellanger on Jul 20, 2009","Trad, Alpine"
9,,58c726d740b44128791b7e46,YDS: Easy 5th,South Corner,YDS: Easy 5th French: 1+ Ewbanks: 3 UIAA: ...,200,[],2.5,"Matt. B. on Sep 13, 2015","Trad, Alpine, 2 pitches, 150', Grade II"
