In [284]:
import re
import json
import os
import requests
import toyplot
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

### 1. Use Mountain Project API to get info from 500 routes near El Capitan
I have already saved the results for this example as ElCapitan_data.txt in the same repository as this notebook. To replicate this yourself, you will need a key for the [Mountain Project API](https://www.mountainproject.com/data).

In [146]:
# base URL for Mountain Project API
baseurl = 'https://www.mountainproject.com/data/get-routes-for-lat-lon?'
userKey = None  # your personal key goes here

# latitude and longitude for El Capitan in Yosemite
lat = '37.732'
lon = '-119.638'

url = baseurl + 'lat=' + lat + '&lon=' + lon + '&maxDistance=1&maxResults=500&key=' + userKey

# request info for for all routes on El Capitan
response = requests.get(url)
rdict = response.json()

In [151]:
# get filepath to save response json
curdir = os.path.dirname(os.path.abspath('.'))
filepath = os.path.join(curdir, "climber-net", "ElCapitan_data.txt")

with open(filepath, 'w') as outfile:
    json.dump(rdict, outfile)

### 2. Filter results to only include climbs that are actually on El Capitan

In [154]:
# read in from file in parent directory
with open(filepath, 'r') as infile:
    rstring = infile.read()
    
rdict = json.loads(rstring)

In [126]:
# convert to pandas df
df = pd.DataFrame(rdict['routes'])

# filter results to only include climbs on El Capitan
def ElCap(x):
    return 'El Capitan' in x

climbs = df.loc[df['location'].apply(ElCap)]
climbs

Unnamed: 0,id,imgMedium,imgSmall,imgSmallMed,imgSqSmall,latitude,location,longitude,name,pitches,rating,starVotes,stars,type,url
0,105924807,https://cdn-files.apstatic.com/climb/106333999...,https://cdn-files.apstatic.com/climb/106333999...,https://cdn-files.apstatic.com/climb/106333999...,https://cdn-files.apstatic.com/climb/106333999...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6367,The Nose,31.0,5.9 C2,272,5.0,"Trad, Aid",https://www.mountainproject.com/route/10592480...
1,106154042,https://cdn-files.apstatic.com/climb/112262746...,https://cdn-files.apstatic.com/climb/112262746...,https://cdn-files.apstatic.com/climb/112262746...,https://cdn-files.apstatic.com/climb/112262746...,37.7304,"[California, Yosemite National Park, Yosemite ...",-119.6393,Salathe Wall,35.0,5.9 C2,95,5.0,"Trad, Aid",https://www.mountainproject.com/route/10615404...
5,106152347,https://cdn-files.apstatic.com/climb/112043487...,https://cdn-files.apstatic.com/climb/112043487...,https://cdn-files.apstatic.com/climb/112043487...,https://cdn-files.apstatic.com/climb/112043487...,37.7309,"[California, Yosemite National Park, Yosemite ...",-119.6313,Zodiac,16.0,C3,89,4.8,Aid,https://www.mountainproject.com/route/10615234...
8,105945535,https://cdn-files.apstatic.com/climb/107631232...,https://cdn-files.apstatic.com/climb/107631232...,https://cdn-files.apstatic.com/climb/107631232...,https://cdn-files.apstatic.com/climb/107631232...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6369,Sacherer Cracker,1.0,5.10a,180,4.8,Trad,https://www.mountainproject.com/route/10594553...
10,105833467,https://cdn-files.apstatic.com/climb/106323046...,https://cdn-files.apstatic.com/climb/106323046...,https://cdn-files.apstatic.com/climb/106323046...,https://cdn-files.apstatic.com/climb/106323046...,37.73,"[California, Yosemite National Park, Yosemite ...",-119.6255,East Buttress,9.0,5.10b,285,4.5,Trad,https://www.mountainproject.com/route/10583346...
16,105870703,https://cdn-files.apstatic.com/climb/106321512...,https://cdn-files.apstatic.com/climb/106321512...,https://cdn-files.apstatic.com/climb/106321512...,https://cdn-files.apstatic.com/climb/106321512...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6369,Salathe (pitch 1),1.0,5.10c,147,4.8,Trad,https://www.mountainproject.com/route/10587070...
19,106110258,https://cdn-files.apstatic.com/climb/108875141...,https://cdn-files.apstatic.com/climb/108875141...,https://cdn-files.apstatic.com/climb/108875141...,https://cdn-files.apstatic.com/climb/108875141...,37.7332,"[California, Yosemite National Park, Yosemite ...",-119.642,Lurking Fear,19.0,5.7 C2,83,4.4,"Trad, Aid",https://www.mountainproject.com/route/10611025...
21,105870188,https://cdn-files.apstatic.com/climb/109565558...,https://cdn-files.apstatic.com/climb/109565558...,https://cdn-files.apstatic.com/climb/109565558...,https://cdn-files.apstatic.com/climb/109565558...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6369,Moby Dick,1.0,5.10a,311,4.7,Trad,https://www.mountainproject.com/route/10587018...
25,105991737,https://cdn-files.apstatic.com/climb/106450499...,https://cdn-files.apstatic.com/climb/106450499...,https://cdn-files.apstatic.com/climb/106450499...,https://cdn-files.apstatic.com/climb/106450499...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6369,Freeblast,10.0,5.11,103,4.5,Trad,https://www.mountainproject.com/route/10599173...
42,105877768,https://cdn-files.apstatic.com/climb/110270257...,https://cdn-files.apstatic.com/climb/110270257...,https://cdn-files.apstatic.com/climb/110270257...,https://cdn-files.apstatic.com/climb/110270257...,37.7281,"[California, Yosemite National Park, Yosemite ...",-119.6369,"La Cosita, Right",1.0,5.9,166,4.1,Trad,https://www.mountainproject.com/route/10587776...


### 3. Use BeautifulSoup to get FA info from all climbs

In [184]:
def FA_split(string):
    split = re.split(', |: | and |, and | & |-| - |- |\(', string)
    
    return(split)

FA_split(test)

['Layton Kor', 'Dave Dornan', '1959. FFA', 'R. Robbins', 'P. Ament', '1960s']

In [257]:
# data frame to hold all climbs and first ascentionists
zero_data = np.zeros(shape=(len(climbs), 2))
firsts = pd.DataFrame(zero_data, columns=['name', 'FA'])

len(firsts)

# iterate through climbs and record FAs
for i in range(len(firsts)):
    # get unique page for climb
    url = climbs.url.iloc[i]
    response = requests.get(url)
    
    # use BeautifulSoup to find FA field in HTML
    soup = BeautifulSoup(response.text, "html5lib")
    
    details = soup.find_all("table", {"class", "description-details"})    

    for detail in details:
        tdtag = detail.find('tbody')
    
    FAs = tdtag.contents[2].contents[3].contents[0].strip()
    
    # set name and FA in firsts df
    firsts.name.loc[i] = climbs.name.iloc[i]
    firsts.FA.loc[i] = FA_split(FAs)
    
firsts

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,name,FA
0,The Nose,"[FA, Warren Harding, Wayne Merry, George Whitm..."
1,Salathe Wall,"[Royal Robbins, Tom Frost, Chuck Pratt 1961 F..."
2,Zodiac,"[Charlie Porter, 1972, ?)]"
3,Sacherer Cracker,"[FFA, Frank Sacherer, Mike Sherrick, 1964]"
4,East Buttress,"[Allen Steck, Wili Siri, Dick Long, Willi Unso..."
5,Salathe (pitch 1),"[Royal Robbins, Chuck Pratt, Tom Frost 1961]"
6,Lurking Fear,"[Dave Bircheff, Phil Bircheff, and Jim Pettigr..."
7,Moby Dick,"[FA, Herb Swedlund, Penny Carr, 5/63 FFA, Fr..."
8,Freeblast,"[Jim Bridwell, John Long, Kevin Worrall, Mike ..."
9,"La Cosita, Right","[TM Herbert, Steve Roper, May 1963.]"


In [258]:
def FA_clean(FAlist):
    # find first list element containing a digit
    for i in range(len(FAlist)):
        if bool(re.search(r'\d', FAlist[i])):
            # if it is first element, ignore it
            if i == 0:
               pass
                
            # if it is not first element, check if first char is number
            else:
                # if first char, remove that element and all elements after
                if FAlist[i][0].isdigit():
                    newlist = FAlist[:i]
                    break
                
                # if not first char, split and remove digits and all elements after
                else:
                    #print('digit not in char 0 - split and remove digits and later elements')
                    newlist = FAlist[:i + 1]
                    for j in range(len(FAlist[i])):
                        if FAlist[i][j].isdigit():
                            newlist[i] = FAlist[i][:j-1]
                            break
                    break
        else:
            newlist = FAlist

    return(newlist)

for i in range(57):
    print(FA_clean(firsts.FA.iloc[i]))

['FA', 'Warren Harding', 'Wayne Merry', 'George Whitmore']
['Royal Robbins', 'Tom Frost', 'Chuck Pratt']
['Charlie Porter']
['FFA', 'Frank Sacherer', 'Mike Sherrick']
['Allen Steck', 'Wili Siri', 'Dick Long', 'Willi Unsoeld', 'June']
['Royal Robbins', 'Chuck Pratt', 'Tom Frost ']
['Dave Bircheff', 'Phil Bircheff', 'and Jim Pettigrew']
['FA', 'Herb Swedlund', 'Penny Carr']
['Jim Bridwell', 'John Long', 'Kevin Worrall', 'Mike Graham', 'John Bachar', 'Ron Kauk']
['TM Herbert', 'Steve Roper', 'May']
['Jack Turner', 'Royal Robbins', 'April']
['Jeff Schaffer', 'Greg Schaffer']
['Bob Kamps', 'Galen Rowell', 'Dan Doody', 'Wally Upton', 'July']
['Frank Sacherer', 'Jim Bridwell']
['FA 1967 TM Herbert', 'Royal Robbins       FFA']
['Dan Doody', 'Bob Kamps', 'Galen Rowell', 'Wally Upton']
['Huber Baum']
['Steve Sutton', 'Charlie Porter', 'Hugh Burton', 'Chris Nelson']
['Bocarde', 'Porter']
['FA', 'Jim Bridwell', 'Kim Schmitz']
['Mark Chapman', 'Art Higbee']
['Charlie Porter And John', 'Paul de St. 

In [278]:
clean = firsts.FA.apply(FA_clean)

# fix errors missed by FA_clean
clean[0] =  ['Warren Harding', 'Wayne Merry', 'George Whitmore']
clean[3] =  ['Frank Sacherer', 'Mike Sherrick']
clean[4] =  ['Allen Steck', 'Will Siri', 'Dick Long', 'Willi Unsoeld']
clean[5] =  ['Royal Robbins', 'Chuck Pratt', 'Tom Frost ']
clean[6] =  ['Dave Bircheff', 'Phil Bircheff', 'Jim Pettigrew']
clean[7] =  ['Herb Swedlund', 'Penny Carr']
clean[9] =  ['TM Herbert', 'Steve Roper']
clean[10] = ['Jack Turner', 'Royal Robbins']
clean[12] = ['Bob Kamps', 'Galen Rowell', 'Dan Doody', 'Wally Upton']
clean[14] = ['TM Herbert', 'Royal Robbins']
clean[16] = ['Alexander Huber', 'Thomas Huber']
clean[18] = ['Gary Bocarde', 'Charlie Porter']
clean[19] = ['Jim Bridwell', 'Kim Schmitz']
clean[21] = ['Charlie Porter', 'John-Paul de St. Croix']
clean[22] = ['Dale Bard']
clean[23] = ['Tobin Sorenson', 'John Bachar']
clean[24] = ['Alexander Huber', 'Max Reichel']
clean[25] = ['Royal Robbins', 'Yvon Chouinard', 'Chuck Pratt', 'Tom Frost']
clean[27] = ['Mason Earle', 'Brad Gobright']
clean[28] = ['Hugh Burton', 'Steve Sutton']
clean[29] = ['Walter Rosenthal', 'Tom Carter', 'Alan Bard']
clean[30] = ['Charlie Row', 'Bill Price', 'Guy Thompson']
clean[34] = ['Jacek Czyz']
clean[37] = ['Mead Hargis', 'Kim Schmitz']
clean[39] = ['Rick Lovelace']
clean[40] = ['Bruce Hawkins', 'Mark Chapman']
clean[43] = ['Bob Kamps', 'Jim Sims']
clean[46] = ['Tommy Caldwell', 'Kevin Jorgeson']
clean[47] = ['Yvon Chouinard', 'TM Herbert']
clean[48] = ['Joe Kelsey', 'Roman Laba', 'John Hudson']
clean[49] = ['Jim Beyer']
clean[50] = ['Chuck Pratt', 'Royal Robbins']
clean[53] = ['Mark Corbett', 'Gary Edmondson', 'Rich Albuschkat']

clean

0        [Warren Harding, Wayne Merry, George Whitmore]
1               [Royal Robbins, Tom Frost, Chuck Pratt]
2                                      [Charlie Porter]
3                       [Frank Sacherer, Mike Sherrick]
4     [Allen Steck, Will Siri, Dick Long, Willi Unso...
5              [Royal Robbins, Chuck Pratt, Tom Frost ]
6         [Dave Bircheff, Phil Bircheff, Jim Pettigrew]
7                           [Herb Swedlund, Penny Carr]
8     [Jim Bridwell, John Long, Kevin Worrall, Mike ...
9                             [TM Herbert, Steve Roper]
10                         [Jack Turner, Royal Robbins]
11                       [Jeff Schaffer, Greg Schaffer]
12    [Bob Kamps, Galen Rowell, Dan Doody, Wally Upton]
13                       [Frank Sacherer, Jim Bridwell]
14                          [TM Herbert, Royal Robbins]
15    [Dan Doody, Bob Kamps, Galen Rowell, Wally Upton]
16                      [Alexander Huber, Thomas Huber]
17    [Steve Sutton, Charlie Porter, Hugh Burton

### 4. Convert to unipartite network of climbers

### 5. Bar graph of climbs by first ascentionists on El Capitan

In [319]:
climbers = []

for i in range(len(clean)):
    climbers = climbers + clean.loc[i]

arr = np.unique(climbers, return_counts=True)
arr = np.core.records.fromarrays(arr)

counts = np.sort(arr, order='f1')[::-1]
counts[:25]

rec.array([('Royal Robbins', 6), ('Jim Bridwell', 5), ('TM Herbert', 4),
           ('Chuck Pratt', 4), ('Charlie Porter', 4), ('Bob Kamps', 4),
           ('Yvon Chouinard', 3), ('Kim Schmitz', 3),
           ('Frank Sacherer', 3), ('Will Siri', 2), ('Wally Upton', 2),
           ('Troy Johnson', 2), ('Tom Frost', 2), ('Steve Sutton', 2),
           ('Steve Roper', 2), ('Mark Chapman', 2), ('John Bachar', 2),
           ('Jim Pettigrew', 2), ('Hugh Burton', 2), ('Galen Rowell', 2),
           ('Dan Doody', 2), ('Charlie Row', 2), ('Allen Steck', 2),
           ('Alexander Huber', 2), ('unknown', 1)],
          dtype=[('f0', '<U22'), ('f1', '<i8')])

In [326]:
#bars = np.vstack([range(25), counts.f1[:25]])
#bars

canvas = toyplot.Canvas(700, 500)
axis = canvas.cartesian()
axis.bars(range(25), counts.f1[:25], title=counts.f0[:25]);