# Scrape mountainproject.com
This notebook extracts data about climbing routes on mountainproject.com and climbers who have rated those routes. The steps are as follows:
1. Use Mountain Project's [route finder](https://www.mountainproject.com/route-guide) to extract all climb's in: New York -> Gunks, The -> Trapps, The. The url's of these routes are given [here](https://www.mountainproject.com/route-finder?selectedIds=105798818&type=rock&diffMinrock=800&diffMinboulder=20000&diffMinaid=70000&diffMinice=30000&diffMinmixed=50000&diffMaxrock=12400&diffMaxboulder=21400&diffMaxaid=75260&diffMaxice=38500&diffMaxmixed=60000&is_trad_climb=1&is_sport_climb=1&is_top_rope=1&stars=0&pitches=0&sort1=area&sort2=rating).
2. Go to the page of each route and extract all useful data.
3. Go to the ratings page for each route and get each climbers' rating of the route.
4. Get all unique climbers that have rated the routes and extract data from their pages.

In [1]:
%pylab inline

# Plotting
import matplotlib.pyplot as plt

# Download html of webpage
import urllib2
# Beautiful soup
import bs4 as bs

import pandas as pd

Populating the interactive namespace from numpy and matplotlib


# 1. Extract route urls

In [3]:
def extract_route_urls(route_table_url, outfile):
    """Take the url for the mountainproject.com table of routes,
    and extract the url for each route.
    """
    source = urllib2.urlopen(route_table_url).read()
    soup = bs.BeautifulSoup(source, 'lxml')

    # All the route urls are repeated twice. Look at the ones in the table only.
    tables = soup.find_all('table')
    route_table = list(tables)[0]
    trs = route_table.find_all('tr')
    routes = [tr.find_all('td') for tr in trs]
    
    nroutes = len(routes)
    
    route_urls = []
    for i in range(nroutes):
        route = routes[i][0]
        s = str(route)
        url = s.split('href="')[1].split('">')[0]
        route_urls.append(url)
        
    df_routes = pd.DataFrame(data=route_urls, columns=['route_url'])
    df_routes.to_csv(outfile, index=False)

In [4]:
route_table_url = 'https://www.mountainproject.com/route-finder?diffMaxaid=75260&diffMaxboulder=21400&diffMaxice=38500&diffMaxmixed=60000&diffMaxrock=12400&diffMinaid=70000&diffMinboulder=20000&diffMinice=30000&diffMinmixed=50000&diffMinrock=800&is_sport_climb=1&is_top_rope=1&is_trad_climb=1&pitches=0&selectedIds=105798818&sort1=area&sort2=rating&stars=0&type=rock&viewAll=1'
outfile = '../data/route_urls.csv'
extract_route_urls(route_table_url, outfile)

In [9]:
df_route_urls = pd.read_csv(outfile)
df_route_urls.head(3)

Unnamed: 0,route_url
0,https://www.mountainproject.com/route/108300801/sudoriferous
1,https://www.mountainproject.com/route/112109817/cordelette-arete
2,https://www.mountainproject.com/route/106133868/69


# 2. Extract route data

In [6]:
def extract_route_data(route_url):
    """Extract interesting data from the webpage of a route.
    This is used for building the content based recommender.
    """
    source = urllib2.urlopen(route_url).read()
    soup = bs.BeautifulSoup(source, 'lxml')
    
    # Name of the route
    name_words = soup.find('div', class_='col-md-9 float-md-right').find('h1').text.split()
    route_name = ' '.join(name_words)
    
    # YDS difficulty rating
    difficulty = soup.find('span', class_='rateYDS').text.split()[0]
    
    # Text description of route
    description = soup.find('div', class_='fr-view').text
    
    # protection type, route length, number of pitches
    details = soup.find('table', class_='description-details')
    l = details.find('tr').text.split()
    
    pro_type = str(l[1].replace(',', ''))
    
    # If number of pitches is not stated, there is only 1
    indices = [i for i, s in enumerate(l) if 'pitch'.lower() in s]
    if indices == []:
        pitches = 1
    else:
        pitches = l[indices[0]-1]
    
    # If the word 'ft' does not appear, the length is unknown. Set it to 0 for now.
    indices = [i for i, s in enumerate(l) if 'ft'.lower() in s]
    if indices == []:
        length = 0
    else:
        length = l[indices[0]-1]
    
    
    # Total number of webpage views, monthly views
    l = details.find_all('tr')[2].text.split()
    total_views = int(l[2].replace(',', ''))
    monthly_views = int(l[5].split('/')[0].replace(',', ''))
    
    # Number of route ratings
    l = soup.find('a', class_='show-tooltip').text.split()
    nratings = int(l[-2].replace(',', ''))
    
    return {
        'route_name':route_name, 'difficulty':difficulty, 'description':description, 
        'pro_type':pro_type, 'length':length, 'pitches':pitches,
        'total_views':total_views, 'monthly_views':monthly_views, 
        'nratings':nratings}

In [7]:
def extract_all_routes_data(route_url_file, route_data_file):
    """Take route urls from a the csv file routes_url_file.
    Extract all the data for each route.
    Save it in the csv file routes_data_file.
    """
    df_routes = pd.read_csv(route_url_file)
    route_urls = df_routes['route_url'].values
    
    route_data_list = []
    nroutes = len(route_urls)
    for i in range(nroutes):
        url = route_urls[i]
        print(i, url)
        data = extract_route_data(url)
        data['iid'] = i
        data['route_url'] = url
        route_data_list.append(data)
    
    df = pd.DataFrame(route_data_list)
    df.to_csv(route_data_file, index=False, encoding='utf-8')

In [8]:
route_url_file = '../data/route_urls.csv'
route_data_file = '../data/route_data.csv'
extract_all_routes_data(route_url_file, route_data_file)

(0, 'https://www.mountainproject.com/route/108300801/sudoriferous')
(1, 'https://www.mountainproject.com/route/112109817/cordelette-arete')
(2, 'https://www.mountainproject.com/route/106133868/69')
(3, 'https://www.mountainproject.com/route/106437310/easy-keyhole')
(4, 'https://www.mountainproject.com/route/109528000/short-and-simple')
(5, 'https://www.mountainproject.com/route/107816567/short-job')
(6, 'https://www.mountainproject.com/route/105799721/horseman')
(7, 'https://www.mountainproject.com/route/106133879/no-picnic')
(8, 'https://www.mountainproject.com/route/106803063/black-fly')
(9, 'https://www.mountainproject.com/route/106133896/double-chin')
(10, 'https://www.mountainproject.com/route/105888225/bunny')
(11, 'https://www.mountainproject.com/route/110494047/pony-express')
(12, 'https://www.mountainproject.com/route/106133888/fancy-idiot')
(13, 'https://www.mountainproject.com/route/106138056/eyebrow')
(14, 'https://www.mountainproject.com/route/106281540/katzenjammer')
(15,

(120, 'https://www.mountainproject.com/route/105811994/ribs')
(121, 'https://www.mountainproject.com/route/107852049/simple-ceilings')
(122, 'https://www.mountainproject.com/route/105954124/high-corner')
(123, 'https://www.mountainproject.com/route/105799687/shockleys-ceiling')
(124, 'https://www.mountainproject.com/route/105902104/ribless-aka-spare-ribs')
(125, 'https://www.mountainproject.com/route/105799704/strictly-from-nowhere')
(126, 'https://www.mountainproject.com/route/107098017/gorilla-my-dreams')
(127, 'https://www.mountainproject.com/route/106134457/calisthenic')
(128, 'https://www.mountainproject.com/route/106295786/oscar-and-charlie')
(129, 'https://www.mountainproject.com/route/109022777/gaston')
(130, 'https://www.mountainproject.com/route/106134470/travels-with-charley')
(131, 'https://www.mountainproject.com/route/106031516/anguish')
(132, 'https://www.mountainproject.com/route/109555637/glyptodon')
(133, 'https://www.mountainproject.com/route/106295806/glypnod')
(134

(235, 'https://www.mountainproject.com/route/106565362/beyond-the-fringe')
(236, 'https://www.mountainproject.com/route/106146808/gory-thumb')
(237, 'https://www.mountainproject.com/route/106146816/badfinger')
(238, 'https://www.mountainproject.com/route/108242249/blunderbus')
(239, 'https://www.mountainproject.com/route/105920916/commando-rave')
(240, 'https://www.mountainproject.com/route/112217799/galactic-hitchhikers')
(241, 'https://www.mountainproject.com/route/106146799/wild-horses')
(242, 'https://www.mountainproject.com/route/105799095/never-never-land')
(243, 'https://www.mountainproject.com/route/107466579/nevermore')
(244, 'https://www.mountainproject.com/route/106189152/city-streets')
(245, 'https://www.mountainproject.com/route/105920912/balrog')
(246, 'https://www.mountainproject.com/route/106291093/dis-mantel')
(247, 'https://www.mountainproject.com/route/107226291/dat-mantel')
(248, 'https://www.mountainproject.com/route/106046438/jaccuse')
(249, 'https://www.mountainp

(351, 'https://www.mountainproject.com/route/106018746/the-spring-p1')
(352, 'https://www.mountainproject.com/route/115325123/mona')
(353, 'https://www.mountainproject.com/route/106733159/high-jinx')
(354, 'https://www.mountainproject.com/route/106089957/lito-and-the-swan')
(355, 'https://www.mountainproject.com/route/106813976/lost-in-yellow')
(356, 'https://www.mountainproject.com/route/113603406/tweak-or-freak')
(357, 'https://www.mountainproject.com/route/107516922/vader')
(358, 'https://www.mountainproject.com/route/108813461/manly-yes-but-i-like-it-too')
(359, 'https://www.mountainproject.com/route/106238729/the-winter')
(360, 'https://www.mountainproject.com/route/105798933/carbs-and-caffeine')
(361, 'https://www.mountainproject.com/route/105888230/the-fall')
(362, 'https://www.mountainproject.com/route/105798938/no-mans-land')
(363, 'https://www.mountainproject.com/route/105798924/the-yellow-wall')
(364, 'https://www.mountainproject.com/route/105898985/the-sting')
(365, 'https:

(466, 'https://www.mountainproject.com/route/109055246/shitty-mitty')
(467, 'https://www.mountainproject.com/route/106640673/hudsons-boulder-problem-aka-trashcan-overhang')
(468, 'https://www.mountainproject.com/route/106456276/low-exposure')
(469, 'https://www.mountainproject.com/route/112930386/freebie')


In [16]:
df_routes = pd.read_csv(route_data_file)

df_routes.head(2)

Unnamed: 0,description,difficulty,iid,length,monthly_views,nratings,pitches,pro_type,route_name,route_url,total_views
0,Pitch 1: Climb the face just next to the huge left-facing corner (feel free to use the corner bl...,5.2,0,120,19,7,2,Trad,Sudoriferous,https://www.mountainproject.com/route/108300801/sudoriferous,1309
1,"The first* route in the Trapps! An easy jaunt up a sharp arete, and surprisingly airy for a such...",5.2,1,80,38,12,1,Trad,Cordelette Arete,https://www.mountainproject.com/route/112109817/cordelette-arete,1161


# 3. Extract the ratings for each route

In [17]:
def extract_route_ratings(route_url):
    """Get all available users' ratings for the route.
    """
    # Website with the rating stats corresponding to route_url
    pieces = route_url.split('/')
    url = 'https://www.mountainproject.com/route/stats/{}/{}'.format(
        pieces[-2], pieces[-1])
    
    source = urllib2.urlopen(url).read()
    soup = bs.BeautifulSoup(source, 'lxml')
    
    # Each row of this table contains the user and number of stars
    rows = soup.find('table', class_='table table-striped').find_all('tr')
    nrows = len(rows)
    
    ratings = []
    for i in range(nrows):
        # Table row
        tr = rows[i]
        user_url = tr.find('a').get('href')
        # Each star is an image. Count the number of them.
        # A bomb image is the same as counting 0 stars.
        images = tr.find_all('img')
        images = [i.get('src') for i in images]
        star_img = 'https://cdn.apstatic.com/img/stars/starBlue.svg'
        nstars = images.count(star_img)
        d = {'user_url':user_url, 'route_url':route_url, 'rating':nstars}
        ratings.append(d)
        
    return ratings

In [18]:
def extract_ratings_for_all_routes(route_url_file, ratings_file):
    """Take route urls from a the csv file route_url_file.
    Extract the ratings for all of the routes.
    Save it in the csv file ratings_file.
    """
    df_routes = pd.read_csv(route_url_file)
    route_urls = df_routes['route_url'].values
    
    ratings_all_routes = []
    nroutes = len(route_urls)
    for i in range(nroutes):
        url = route_urls[i]
        print(i, url)
        ratings = extract_route_ratings(url)
        # Concatenate lists. Don't make a list of lists by appending.
        ratings_all_routes += ratings
        
    df = pd.DataFrame(ratings_all_routes)
    df.to_csv(ratings_file, index=False)

In [19]:
route_url_file = '../data/route_urls.csv'
ratings_file = '../data/route_ratings.csv'
extract_ratings_for_all_routes(route_url_file, ratings_file)

(0, 'https://www.mountainproject.com/route/108300801/sudoriferous')
(1, 'https://www.mountainproject.com/route/112109817/cordelette-arete')
(2, 'https://www.mountainproject.com/route/106133868/69')
(3, 'https://www.mountainproject.com/route/106437310/easy-keyhole')
(4, 'https://www.mountainproject.com/route/109528000/short-and-simple')
(5, 'https://www.mountainproject.com/route/107816567/short-job')
(6, 'https://www.mountainproject.com/route/105799721/horseman')
(7, 'https://www.mountainproject.com/route/106133879/no-picnic')
(8, 'https://www.mountainproject.com/route/106803063/black-fly')
(9, 'https://www.mountainproject.com/route/106133896/double-chin')
(10, 'https://www.mountainproject.com/route/105888225/bunny')
(11, 'https://www.mountainproject.com/route/110494047/pony-express')
(12, 'https://www.mountainproject.com/route/106133888/fancy-idiot')
(13, 'https://www.mountainproject.com/route/106138056/eyebrow')
(14, 'https://www.mountainproject.com/route/106281540/katzenjammer')
(15,

(120, 'https://www.mountainproject.com/route/105811994/ribs')
(121, 'https://www.mountainproject.com/route/107852049/simple-ceilings')
(122, 'https://www.mountainproject.com/route/105954124/high-corner')
(123, 'https://www.mountainproject.com/route/105799687/shockleys-ceiling')
(124, 'https://www.mountainproject.com/route/105902104/ribless-aka-spare-ribs')
(125, 'https://www.mountainproject.com/route/105799704/strictly-from-nowhere')
(126, 'https://www.mountainproject.com/route/107098017/gorilla-my-dreams')
(127, 'https://www.mountainproject.com/route/106134457/calisthenic')
(128, 'https://www.mountainproject.com/route/106295786/oscar-and-charlie')
(129, 'https://www.mountainproject.com/route/109022777/gaston')
(130, 'https://www.mountainproject.com/route/106134470/travels-with-charley')
(131, 'https://www.mountainproject.com/route/106031516/anguish')
(132, 'https://www.mountainproject.com/route/109555637/glyptodon')
(133, 'https://www.mountainproject.com/route/106295806/glypnod')
(134

(235, 'https://www.mountainproject.com/route/106565362/beyond-the-fringe')
(236, 'https://www.mountainproject.com/route/106146808/gory-thumb')
(237, 'https://www.mountainproject.com/route/106146816/badfinger')
(238, 'https://www.mountainproject.com/route/108242249/blunderbus')
(239, 'https://www.mountainproject.com/route/105920916/commando-rave')
(240, 'https://www.mountainproject.com/route/112217799/galactic-hitchhikers')
(241, 'https://www.mountainproject.com/route/106146799/wild-horses')
(242, 'https://www.mountainproject.com/route/105799095/never-never-land')
(243, 'https://www.mountainproject.com/route/107466579/nevermore')
(244, 'https://www.mountainproject.com/route/106189152/city-streets')
(245, 'https://www.mountainproject.com/route/105920912/balrog')
(246, 'https://www.mountainproject.com/route/106291093/dis-mantel')
(247, 'https://www.mountainproject.com/route/107226291/dat-mantel')
(248, 'https://www.mountainproject.com/route/106046438/jaccuse')
(249, 'https://www.mountainp

(351, 'https://www.mountainproject.com/route/106018746/the-spring-p1')
(352, 'https://www.mountainproject.com/route/115325123/mona')
(353, 'https://www.mountainproject.com/route/106733159/high-jinx')
(354, 'https://www.mountainproject.com/route/106089957/lito-and-the-swan')
(355, 'https://www.mountainproject.com/route/106813976/lost-in-yellow')
(356, 'https://www.mountainproject.com/route/113603406/tweak-or-freak')
(357, 'https://www.mountainproject.com/route/107516922/vader')
(358, 'https://www.mountainproject.com/route/108813461/manly-yes-but-i-like-it-too')
(359, 'https://www.mountainproject.com/route/106238729/the-winter')
(360, 'https://www.mountainproject.com/route/105798933/carbs-and-caffeine')
(361, 'https://www.mountainproject.com/route/105888230/the-fall')
(362, 'https://www.mountainproject.com/route/105798938/no-mans-land')
(363, 'https://www.mountainproject.com/route/105798924/the-yellow-wall')
(364, 'https://www.mountainproject.com/route/105898985/the-sting')
(365, 'https:

(466, 'https://www.mountainproject.com/route/109055246/shitty-mitty')
(467, 'https://www.mountainproject.com/route/106640673/hudsons-boulder-problem-aka-trashcan-overhang')
(468, 'https://www.mountainproject.com/route/106456276/low-exposure')
(469, 'https://www.mountainproject.com/route/112930386/freebie')


In [25]:
df_ratings = pd.read_csv(ratings_file)
# df_ratings.style.format(make_clickable, subset=['user_url', 'route_url'])
df_ratings.sample(3)

Unnamed: 0,rating,route_url,user_url
5971,4,https://www.mountainproject.com/route/105803228/stirrup-trouble,https://www.mountainproject.com/user/107238223/tony-lopez
2948,4,https://www.mountainproject.com/route/105799032/nosedive,https://www.mountainproject.com/user/107026497/jason-shermer
6265,2,https://www.mountainproject.com/route/106034654/easy-overhang,https://www.mountainproject.com/user/106468686/tdavidock


# 4. Extract data about each unique climber

In [26]:
def extract_user_data(user_url):
    """Get info about each user.
    """
    source = urllib2.urlopen(user_url).read()
    soup = bs.BeautifulSoup(source, 'lxml')
    
    # Get user name
    name = soup.find('h2', class_='dont-shrink mb-0').text
    
    # All info under the name
    profile_list = soup.find('div', class_='col-xs-12 text-xs-center').find('div').text.split()
    profile = ' '.join(profile_list)

    return {'name':name, 'profile':profile}

In [27]:
def extract_all_users_data(route_ratings_file, user_data_file):
    """Take unique user urls from a the csv file route_ratings_file.
    Extract all the data for each user.
    Save it in the csv file user_data_file.
    """
    df_ratings = pd.read_csv(route_ratings_file)
    
    # Get only the unique users
    user_urls = df_ratings['user_url'].unique()
    print user_urls.shape
    user_data_list = []
    nusers = len(user_urls)
    for i in range(nusers):
        url = user_urls[i]
        print(i, url)
        data = extract_user_data(url)
        data['uid'] = i
        data['user_url'] = url
        user_data_list.append(data)
    
    df = pd.DataFrame(user_data_list)
    df.to_csv(user_data_file, index=False, encoding='utf-8')

In [28]:
route_ratings_file = '../data/route_ratings.csv'
user_data_file = '../data/user_data.csv'
extract_all_users_data(route_ratings_file, user_data_file)

(2432,)
(0, 'https://www.mountainproject.com/user/106087077/mark-roth')
(1, 'https://www.mountainproject.com/user/109581184/joel-ryan')
(2, 'https://www.mountainproject.com/user/200223567/anyo-lesiuk')
(3, 'https://www.mountainproject.com/user/107253738/dragons')
(4, 'https://www.mountainproject.com/user/106630260/j-serpico')
(5, 'https://www.mountainproject.com/user/110409351/ben-hoste')
(6, 'https://www.mountainproject.com/user/109206590/john271')
(7, 'https://www.mountainproject.com/user/112449870/david-kerkeslager')
(8, 'https://www.mountainproject.com/user/112460885/nicole-vidal')
(9, 'https://www.mountainproject.com/user/113522523/shionanlim')
(10, 'https://www.mountainproject.com/user/106679659/jonathan-steitzer')
(11, 'https://www.mountainproject.com/user/107518458/kurtz')
(12, 'https://www.mountainproject.com/user/112423730/albi-eds')
(13, 'https://www.mountainproject.com/user/106646308/kiff')
(14, 'https://www.mountainproject.com/user/110729900/j-l')
(15, 'https://www.mountai

(123, 'https://www.mountainproject.com/user/106567319/mike-mclean')
(124, 'https://www.mountainproject.com/user/106762873/eric-albers')
(125, 'https://www.mountainproject.com/user/13658/rob-bauer')
(126, 'https://www.mountainproject.com/user/106607660/johnwesely')
(127, 'https://www.mountainproject.com/user/106822110/kevin-heckeler')
(128, 'https://www.mountainproject.com/user/106797746/valerie-ab')
(129, 'https://www.mountainproject.com/user/106863637/adkeditor-brown')
(130, 'https://www.mountainproject.com/user/106456515/zach-s')
(131, 'https://www.mountainproject.com/user/107872722/stephen-bittner')
(132, 'https://www.mountainproject.com/user/108674850/ccas')
(133, 'https://www.mountainproject.com/user/109405478/patrick-killian')
(134, 'https://www.mountainproject.com/user/106993942/luc-514')
(135, 'https://www.mountainproject.com/user/108259746/jim-thayer')
(136, 'https://www.mountainproject.com/user/105890911/greg-davis')
(137, 'https://www.mountainproject.com/user/110036457/griff

(245, 'https://www.mountainproject.com/user/10610/jay-knower')
(246, 'https://www.mountainproject.com/user/106011891/ross-purnell')
(247, 'https://www.mountainproject.com/user/106687445/peterw-whitmore')
(248, 'https://www.mountainproject.com/user/106749092/brian-rosenfeld')
(249, 'https://www.mountainproject.com/user/106619581/greg-sudlow')
(250, 'https://www.mountainproject.com/user/106751248/rcongo')
(251, 'https://www.mountainproject.com/user/106588732/lee-h')
(252, 'https://www.mountainproject.com/user/106715896/christina-callaghan')
(253, 'https://www.mountainproject.com/user/106773591/jonathan-ward')
(254, 'https://www.mountainproject.com/user/11156/tradiban')
(255, 'https://www.mountainproject.com/user/106802916/anton-tokranov')
(256, 'https://www.mountainproject.com/user/106113062/ben-c')
(257, 'https://www.mountainproject.com/user/11690/rui-ferreira')
(258, 'https://www.mountainproject.com/user/106680875/david-ford')
(259, 'https://www.mountainproject.com/user/106844475/nate-

(367, 'https://www.mountainproject.com/user/108321835/sam-fox')
(368, 'https://www.mountainproject.com/user/111126783/city-dweller')
(369, 'https://www.mountainproject.com/user/110538349/liz-reiman')
(370, 'https://www.mountainproject.com/user/110148121/systematic')
(371, 'https://www.mountainproject.com/user/109688349/ajg519')
(372, 'https://www.mountainproject.com/user/108052600/liz-han')
(373, 'https://www.mountainproject.com/user/108085357/tyler-kempney')
(374, 'https://www.mountainproject.com/user/109620395/maureen-petterson')
(375, 'https://www.mountainproject.com/user/109711410/billy-simek')
(376, 'https://www.mountainproject.com/user/107755397/ed-wade')
(377, 'https://www.mountainproject.com/user/109411206/cameron-collins')
(378, 'https://www.mountainproject.com/user/109634018/richard-beck')
(379, 'https://www.mountainproject.com/user/111626699/alessandro-pruscino')
(380, 'https://www.mountainproject.com/user/111627067/jilliancai')
(381, 'https://www.mountainproject.com/user/11

(488, 'https://www.mountainproject.com/user/109542989/vadim-mikheev')
(489, 'https://www.mountainproject.com/user/200409293/vinny-casey')
(490, 'https://www.mountainproject.com/user/200067404/max-zielinski')
(491, 'https://www.mountainproject.com/user/200062780/aare-puussaar')
(492, 'https://www.mountainproject.com/user/107551540/zyvandus')
(493, 'https://www.mountainproject.com/user/107552617/b-callahan')
(494, 'https://www.mountainproject.com/user/10909/charlesr')
(495, 'https://www.mountainproject.com/user/106125724/ian-wauchope')
(496, 'https://www.mountainproject.com/user/107519230/adam-faller')
(497, 'https://www.mountainproject.com/user/106030894/brian-aitken')
(498, 'https://www.mountainproject.com/user/107607088/danielle-nelson')
(499, 'https://www.mountainproject.com/user/107664805/nicholas-kinloch')
(500, 'https://www.mountainproject.com/user/105832216/buckeeb-hart')
(501, 'https://www.mountainproject.com/user/105922412/nick-weinstock')
(502, 'https://www.mountainproject.com

(610, 'https://www.mountainproject.com/user/109437284/kimberley-rothacker')
(611, 'https://www.mountainproject.com/user/107699335/jamint')
(612, 'https://www.mountainproject.com/user/109525957/jon-zorella')
(613, 'https://www.mountainproject.com/user/109816235/nth')
(614, 'https://www.mountainproject.com/user/108805745/cedric-bg')
(615, 'https://www.mountainproject.com/user/107417154/aaron-johnston')
(616, 'https://www.mountainproject.com/user/109563509/lotte-meijer')
(617, 'https://www.mountainproject.com/user/108981414/matt-vega')
(618, 'https://www.mountainproject.com/user/109316126/cjmaynar')
(619, 'https://www.mountainproject.com/user/106978809/ben-smith')
(620, 'https://www.mountainproject.com/user/109405787/danielle-lansford')
(621, 'https://www.mountainproject.com/user/108265416/sigg')
(622, 'https://www.mountainproject.com/user/108272772/ryan-c')
(623, 'https://www.mountainproject.com/user/105886625/eric-beyeler')
(624, 'https://www.mountainproject.com/user/107097698/phil-tatt

(731, 'https://www.mountainproject.com/user/105990845/orphaned')
(732, 'https://www.mountainproject.com/user/105795015/adam-catalano')
(733, 'https://www.mountainproject.com/user/12448/jeremy')
(734, 'https://www.mountainproject.com/user/105812440/micahisaac')
(735, 'https://www.mountainproject.com/user/106778424/larry-s')
(736, 'https://www.mountainproject.com/user/107055104/tallmatt')
(737, 'https://www.mountainproject.com/user/106388951/jason-a')
(738, 'https://www.mountainproject.com/user/106817494/snowstoked')
(739, 'https://www.mountainproject.com/user/106841465/amy-r-wilson')
(740, 'https://www.mountainproject.com/user/106709846/bca')
(741, 'https://www.mountainproject.com/user/106814605/sickmove')
(742, 'https://www.mountainproject.com/user/106794436/wei-ming-lam')
(743, 'https://www.mountainproject.com/user/106649711/josh-smethers')
(744, 'https://www.mountainproject.com/user/106637423/jennifer-wies')
(745, 'https://www.mountainproject.com/user/107431656/charlesg3-gruenwald')


(853, 'https://www.mountainproject.com/user/110787766/xi-lian')
(854, 'https://www.mountainproject.com/user/106134707/jeffrey-lecours')
(855, 'https://www.mountainproject.com/user/107209622/brostin')
(856, 'https://www.mountainproject.com/user/110062671/vincent-liguori')
(857, 'https://www.mountainproject.com/user/105888111/mr-malloc')
(858, 'https://www.mountainproject.com/user/110728444/ghost')
(859, 'https://www.mountainproject.com/user/108343834/dave-caro')
(860, 'https://www.mountainproject.com/user/110964058/justin-chapman')
(861, 'https://www.mountainproject.com/user/113026820/kira-bouchard')
(862, 'https://www.mountainproject.com/user/108939459/crackatoa-spiesbach')
(863, 'https://www.mountainproject.com/user/107368007/john-gehrig')
(864, 'https://www.mountainproject.com/user/105896714/taino-grosjean')
(865, 'https://www.mountainproject.com/user/106020371/risi')
(866, 'https://www.mountainproject.com/user/107767609/gillian-hammond')
(867, 'https://www.mountainproject.com/user/1

(974, 'https://www.mountainproject.com/user/110370905/aj-w')
(975, 'https://www.mountainproject.com/user/108598790/alex-abrams')
(976, 'https://www.mountainproject.com/user/106342472/proto')
(977, 'https://www.mountainproject.com/user/109444514/ben-mccarty')
(978, 'https://www.mountainproject.com/user/106542092/julia-burns')
(979, 'https://www.mountainproject.com/user/106920912/james-schmidt')
(980, 'https://www.mountainproject.com/user/108268000/karl-henize')
(981, 'https://www.mountainproject.com/user/106788981/detodd')
(982, 'https://www.mountainproject.com/user/108894743/m-bageant')
(983, 'https://www.mountainproject.com/user/105961696/furious-d')
(984, 'https://www.mountainproject.com/user/107621295/mathdesj')
(985, 'https://www.mountainproject.com/user/107663290/logan-schiff')
(986, 'https://www.mountainproject.com/user/106242548/mirandas-daddy')
(987, 'https://www.mountainproject.com/user/106411080/jon-obrien')
(988, 'https://www.mountainproject.com/user/106619418/jasonn')
(989,

(1095, 'https://www.mountainproject.com/user/110133075/freed-caap')
(1096, 'https://www.mountainproject.com/user/107427822/kyle-olsen')
(1097, 'https://www.mountainproject.com/user/110423556/dustin-k')
(1098, 'https://www.mountainproject.com/user/106953888/andrewn')
(1099, 'https://www.mountainproject.com/user/107780250/scott-michelsen')
(1100, 'https://www.mountainproject.com/user/107030540/stevie-k')
(1101, 'https://www.mountainproject.com/user/111598397/matt-way')
(1102, 'https://www.mountainproject.com/user/10232/tony-b')
(1103, 'https://www.mountainproject.com/user/105824694/j-nickel')
(1104, 'https://www.mountainproject.com/user/105828714/adrin-robert')
(1105, 'https://www.mountainproject.com/user/105813053/denis-oconnor')
(1106, 'https://www.mountainproject.com/user/106981854/jeff-dillon')
(1107, 'https://www.mountainproject.com/user/107827568/wlevandowski')
(1108, 'https://www.mountainproject.com/user/106740823/brianws')
(1109, 'https://www.mountainproject.com/user/110683495/ar

(1215, 'https://www.mountainproject.com/user/110737852/andy-danger')
(1216, 'https://www.mountainproject.com/user/200194507/matt-fitzgerald')
(1217, 'https://www.mountainproject.com/user/112846385/annie-levine')
(1218, 'https://www.mountainproject.com/user/110712867/alex-garcia')
(1219, 'https://www.mountainproject.com/user/107097681/meghan-spiro')
(1220, 'https://www.mountainproject.com/user/107816448/nathan-majoros')
(1221, 'https://www.mountainproject.com/user/106708236/amberly-gable')
(1222, 'https://www.mountainproject.com/user/106775372/dustin-lagoy')
(1223, 'https://www.mountainproject.com/user/109526275/chris-bt')
(1224, 'https://www.mountainproject.com/user/109748591/scott-bissi')
(1225, 'https://www.mountainproject.com/user/111900615/mike-lach')
(1226, 'https://www.mountainproject.com/user/110086015/jrm89')
(1227, 'https://www.mountainproject.com/user/111859070/neils')
(1228, 'https://www.mountainproject.com/user/108167130/francisco-rosario')
(1229, 'https://www.mountainproje

(1335, 'https://www.mountainproject.com/user/110452300/alex-feinberg')
(1336, 'https://www.mountainproject.com/user/109052547/melanies')
(1337, 'https://www.mountainproject.com/user/111117552/kirk-newcombe')
(1338, 'https://www.mountainproject.com/user/112070348/dara-hashemi')
(1339, 'https://www.mountainproject.com/user/200106564/o-c')
(1340, 'https://www.mountainproject.com/user/113462994/liamjmccarthy')
(1341, 'https://www.mountainproject.com/user/107460666/tim-mcgivern')
(1342, 'https://www.mountainproject.com/user/112107400/andy-sparks')
(1343, 'https://www.mountainproject.com/user/200132932/max-s')
(1344, 'https://www.mountainproject.com/user/106335624/paul-shultz')
(1345, 'https://www.mountainproject.com/user/107299797/ntableman')
(1346, 'https://www.mountainproject.com/user/107417501/alex-jacques')
(1347, 'https://www.mountainproject.com/user/112077176/adam-constantilos')
(1348, 'https://www.mountainproject.com/user/107753556/alexamarcigliano')
(1349, 'https://www.mountainproje

(1454, 'https://www.mountainproject.com/user/10762/ron-olsen')
(1455, 'https://www.mountainproject.com/user/105871391/mtnjunkie')
(1456, 'https://www.mountainproject.com/user/10589/l-hamilton')
(1457, 'https://www.mountainproject.com/user/106850769/eric-kuenstner')
(1458, 'https://www.mountainproject.com/user/105822976/george-wilson')
(1459, 'https://www.mountainproject.com/user/107861772/russ-moore')
(1460, 'https://www.mountainproject.com/user/109020715/kilka-hamsa')
(1461, 'https://www.mountainproject.com/user/106715529/jason-d')
(1462, 'https://www.mountainproject.com/user/105846604/dean-cool')
(1463, 'https://www.mountainproject.com/user/10121/charles-vernon')
(1464, 'https://www.mountainproject.com/user/109248329/peterkd5bwt')
(1465, 'https://www.mountainproject.com/user/112041069/ai-love')
(1466, 'https://www.mountainproject.com/user/111697802/kenny-trinh')
(1467, 'https://www.mountainproject.com/user/111687838/spencer-perry')
(1468, 'https://www.mountainproject.com/user/1107278

(1573, 'https://www.mountainproject.com/user/107417220/naz-ahmed')
(1574, 'https://www.mountainproject.com/user/108018559/derek-swart')
(1575, 'https://www.mountainproject.com/user/106655109/sam-stephens')
(1576, 'https://www.mountainproject.com/user/105859936/david-greenhouse')
(1577, 'https://www.mountainproject.com/user/105987313/willf')
(1578, 'https://www.mountainproject.com/user/107719532/adria-chamas')
(1579, 'https://www.mountainproject.com/user/107428130/catchen')
(1580, 'https://www.mountainproject.com/user/107854794/joe-grossmann')
(1581, 'https://www.mountainproject.com/user/10270/rich-kelly')
(1582, 'https://www.mountainproject.com/user/106389004/ktaylor')
(1583, 'https://www.mountainproject.com/user/107564764/eric-g')
(1584, 'https://www.mountainproject.com/user/106883214/ryu')
(1585, 'https://www.mountainproject.com/user/108159940/dan-scinto')
(1586, 'https://www.mountainproject.com/user/108185728/hyun-joo-hwang')
(1587, 'https://www.mountainproject.com/user/106961703/ja

(1692, 'https://www.mountainproject.com/user/200183675/chris-bair')
(1693, 'https://www.mountainproject.com/user/200385389/robert-maslanka')
(1694, 'https://www.mountainproject.com/user/105934900/rkm')
(1695, 'https://www.mountainproject.com/user/106810849/tee-cahill')
(1696, 'https://www.mountainproject.com/user/109006073/johninzanti-inzanti')
(1697, 'https://www.mountainproject.com/user/106886632/stephanie-chick')
(1698, 'https://www.mountainproject.com/user/107579237/scott-teske')
(1699, 'https://www.mountainproject.com/user/107085999/john-bain')
(1700, 'https://www.mountainproject.com/user/105823960/karsten-delap')
(1701, 'https://www.mountainproject.com/user/106892600/jesse-m')
(1702, 'https://www.mountainproject.com/user/200145837/raoul-raoulhervez')
(1703, 'https://www.mountainproject.com/user/112137261/chrisp1j')
(1704, 'https://www.mountainproject.com/user/108409515/sam-sweigart')
(1705, 'https://www.mountainproject.com/user/200122520/adam-h')
(1706, 'https://www.mountainproje

(1812, 'https://www.mountainproject.com/user/109443010/vernon-w')
(1813, 'https://www.mountainproject.com/user/107327605/joeny')
(1814, 'https://www.mountainproject.com/user/106135433/griswald')
(1815, 'https://www.mountainproject.com/user/108361027/nick-vigs')
(1816, 'https://www.mountainproject.com/user/200112178/brennan-vandenhoek')
(1817, 'https://www.mountainproject.com/user/200310838/adam-smizaski')
(1818, 'https://www.mountainproject.com/user/108543653/stonebhikku')
(1819, 'https://www.mountainproject.com/user/109638995/szheng')
(1820, 'https://www.mountainproject.com/user/106764511/sean-a-smith')
(1821, 'https://www.mountainproject.com/user/108257827/rafael-nonato')
(1822, 'https://www.mountainproject.com/user/109544573/thechutrain')
(1823, 'https://www.mountainproject.com/user/112142591/daniel-bateman')
(1824, 'https://www.mountainproject.com/user/106959007/jeremy-nelson')
(1825, 'https://www.mountainproject.com/user/11224/cpn-dunsel')
(1826, 'https://www.mountainproject.com/u

(1932, 'https://www.mountainproject.com/user/106013776/charlie-s')
(1933, 'https://www.mountainproject.com/user/107567801/steve-graham')
(1934, 'https://www.mountainproject.com/user/105924373/mbx5-barnas')
(1935, 'https://www.mountainproject.com/user/111820491/john-stoneham')
(1936, 'https://www.mountainproject.com/user/106568741/michal-pasniewski')
(1937, 'https://www.mountainproject.com/user/107082768/johnva')
(1938, 'https://www.mountainproject.com/user/107865932/nicole-e-murray')
(1939, 'https://www.mountainproject.com/user/110590584/mikey-anderson')
(1940, 'https://www.mountainproject.com/user/110781286/chris-moratz')
(1941, 'https://www.mountainproject.com/user/112009752/jonathan-parham')
(1942, 'https://www.mountainproject.com/user/109167471/stephen-montgomery')
(1943, 'https://www.mountainproject.com/user/105795871/tombo')
(1944, 'https://www.mountainproject.com/user/109199664/emily-black')
(1945, 'https://www.mountainproject.com/user/108901800/gabe-torres')
(1946, 'https://www

(2051, 'https://www.mountainproject.com/user/107536111/gary-jones')
(2052, 'https://www.mountainproject.com/user/111536914/allison-tau')
(2053, 'https://www.mountainproject.com/user/15142/adam-peters')
(2054, 'https://www.mountainproject.com/user/106919775/gwb')
(2055, 'https://www.mountainproject.com/user/107388687/jindap')
(2056, 'https://www.mountainproject.com/user/106315900/nate-miller')
(2057, 'https://www.mountainproject.com/user/112524899/matthew-stark')
(2058, 'https://www.mountainproject.com/user/200079884/caitlin-m')
(2059, 'https://www.mountainproject.com/user/111179858/wendyah')
(2060, 'https://www.mountainproject.com/user/200057419/alejandro-t')
(2061, 'https://www.mountainproject.com/user/108647959/izzy-nawfal')
(2062, 'https://www.mountainproject.com/user/112084693/andrew-s')
(2063, 'https://www.mountainproject.com/user/105937206/dan-ward')
(2064, 'https://www.mountainproject.com/user/107608024/joshua-corbin')
(2065, 'https://www.mountainproject.com/user/15148/jim-thomp

(2170, 'https://www.mountainproject.com/user/105822848/aeon-aki')
(2171, 'https://www.mountainproject.com/user/110501405/paul-northup')
(2172, 'https://www.mountainproject.com/user/108732453/wallace')
(2173, 'https://www.mountainproject.com/user/110739544/katie-moenkhaus')
(2174, 'https://www.mountainproject.com/user/107370769/josh-schmaltz')
(2175, 'https://www.mountainproject.com/user/105829653/tim-wolfe')
(2176, 'https://www.mountainproject.com/user/111282455/leann-du')
(2177, 'https://www.mountainproject.com/user/108436732/jeannier3')
(2178, 'https://www.mountainproject.com/user/11812/eli-helmuth')
(2179, 'https://www.mountainproject.com/user/106987324/jayz')
(2180, 'https://www.mountainproject.com/user/108358038/michael-burnell')
(2181, 'https://www.mountainproject.com/user/106257649/jason-killgore')
(2182, 'https://www.mountainproject.com/user/107133892/tomw')
(2183, 'https://www.mountainproject.com/user/111649446/nana-aoki')
(2184, 'https://www.mountainproject.com/user/105975214

(2289, 'https://www.mountainproject.com/user/200399257/marcello-cricco-lizza')
(2290, 'https://www.mountainproject.com/user/112496934/jesse-vanek')
(2291, 'https://www.mountainproject.com/user/106376808/jill-north-wilson')
(2292, 'https://www.mountainproject.com/user/112026181/kristinicole')
(2293, 'https://www.mountainproject.com/user/106000626/brianup-up')
(2294, 'https://www.mountainproject.com/user/105830665/ralph-kolva')
(2295, 'https://www.mountainproject.com/user/10359/leo-paik')
(2296, 'https://www.mountainproject.com/user/106714105/stevendo')
(2297, 'https://www.mountainproject.com/user/106068274/ryan-hunt')
(2298, 'https://www.mountainproject.com/user/107284917/tessa-o')
(2299, 'https://www.mountainproject.com/user/108184331/alexis-wojtanowski')
(2300, 'https://www.mountainproject.com/user/109081636/eberliner')
(2301, 'https://www.mountainproject.com/user/106043351/hans')
(2302, 'https://www.mountainproject.com/user/110518966/sonya-st-john')
(2303, 'https://www.mountainprojec

(2409, 'https://www.mountainproject.com/user/109740864/ryanmullins')
(2410, 'https://www.mountainproject.com/user/106319582/treker')
(2411, 'https://www.mountainproject.com/user/106935843/will-adsit')
(2412, 'https://www.mountainproject.com/user/108132717/aharlec')
(2413, 'https://www.mountainproject.com/user/108373969/hannah-tuttle')
(2414, 'https://www.mountainproject.com/user/112843735/chris-kocher')
(2415, 'https://www.mountainproject.com/user/110687338/ian-mcmullen')
(2416, 'https://www.mountainproject.com/user/107595240/kellen-j')
(2417, 'https://www.mountainproject.com/user/110802936/arthur-torrey')
(2418, 'https://www.mountainproject.com/user/105870073/jordan-k')
(2419, 'https://www.mountainproject.com/user/12291/jamie-givens')
(2420, 'https://www.mountainproject.com/user/106298729/k-man')
(2421, 'https://www.mountainproject.com/user/107550300/david-raines')
(2422, 'https://www.mountainproject.com/user/108156601/steven-scr')
(2423, 'https://www.mountainproject.com/user/10886726

# Add uid and iid to route_ratings.csv

In [31]:
route_data_file = '../data/route_data.csv'
df_routes = pd.read_csv(route_data_file)

ratings_file = '../data/route_ratings.csv'
df_ratings = pd.read_csv(ratings_file)

user_data_file = '../data/user_data.csv'
df_users = pd.read_csv(user_data_file)

# Start with the ratings table.
# Add the iid
df = df_ratings.merge(df_routes[['iid', 'route_url']], on='route_url', how='inner')
# Add the uid
df = df.merge(df_users[['uid', 'user_url']], on='user_url', how='inner')
# Sort for readability
df = df.sort_values(['iid', 'uid'])

# Save the new ratings file
df[['iid', 'uid', 'rating', 'route_url', 'user_url']].to_csv('../data/route_ratings_merged.csv', index=False, encoding='utf-8')

In [32]:
display(df_ratings.head())
display(df.head())

Unnamed: 0,rating,route_url,user_url
0,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/106087077/mark-roth
1,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/109581184/joel-ryan
2,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/200223567/anyo-lesiuk
3,1,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/107253738/dragons
4,1,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/106630260/j-serpico


Unnamed: 0,rating,route_url,user_url,iid,uid
0,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/106087077/mark-roth,0,0
142,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/109581184/joel-ryan,0,1
218,2,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/200223567/anyo-lesiuk,0,2
224,1,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/107253738/dragons,0,3
248,1,https://www.mountainproject.com/route/108300801/sudoriferous,https://www.mountainproject.com/user/106630260/j-serpico,0,4
