# Gather Data

In [1]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#for api calls
from bs4 import BeautifulSoup
import requests
import json
import time

#personal api and aws keys
import config

import query_helper
# import websrcape_wiki_table

## Create climbs db

In [None]:
query_helper.create_db('climbs')

## Create route tables

In [None]:
route_table = """
CREATE TABLE routes (
    id int NOT NULL,
    name varchar(100),
    type varchar(30),
    rating varchar(20),
    stars float,
    starVotes int,
    pitches int,
    location varchar(255),
    url varchar(255),
    longitude float,
    latitude float,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_table)

In [None]:
route_describe_table = """
CREATE TABLE route_description (
    id int NOT NULL,
    info TEXT,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_describe_table)

## Use Mountain Project Api

### Get GPS Coordinates

In [49]:
import xml.etree.ElementTree as ET
tree = ET.parse('waypoints (11).xml')
root = tree.getroot()

In [50]:
coordinates = []
for child in root:
    coordinates.append(child.attrib)

In [51]:
coordinates

[{'lat': '41.653', 'lon': '-74.37805'},
 {'lat': '41.66351', 'lon': '-74.36363'},
 {'lat': '41.67607', 'lon': '-74.3468'},
 {'lat': '41.68402', 'lon': '-74.3365'},
 {'lat': '41.68914', 'lon': '-74.32551'},
 {'lat': '41.68658', 'lon': '-74.31177'},
 {'lat': '41.75223', 'lon': '-74.24806'},
 {'lat': '41.75914', 'lon': '-74.25939'},
 {'lat': '41.77245', 'lon': '-74.23844'},
 {'lat': '41.7617', 'lon': '-74.22882'},
 {'lat': '41.77142', 'lon': '-74.24806'},
 {'lat': '41.70357', 'lon': '-74.26351'},
 {'lat': '41.71049', 'lon': '-74.24325'},
 {'lat': '41.71715', 'lon': '-74.22848'},
 {'lat': '41.72406', 'lon': '-74.22161'},
 {'lat': '41.72791', 'lon': '-74.21165'},
 {'lat': '41.73533', 'lon': '-74.19414'},
 {'lat': '41.74327', 'lon': '-74.18555'},
 {'lat': '41.74914', 'lon': '-74.17448'},
 {'lat': '41.75298', 'lon': '-74.16074'},
 {'lat': '41.76296', 'lon': '-74.1683'},
 {'lat': '41.77755', 'lon': '-74.15113'},
 {'lat': '41.77115', 'lon': '-74.15628'},
 {'lat': '41.78113', 'lon': '-74.14014'}

In [121]:
for cord in coordinates:
    cord['lon'] = str(round((float(cord['lon']) + .5), 5))

In [122]:
coordinates

[{'lat': '31.47494', 'lon': '-114.34096'},
 {'lat': '30.64721', 'lon': '-113.81346'},
 {'lat': '29.73609', 'lon': '-113.11013'},
 {'lat': '28.73963', 'lon': '-112.31888'},
 {'lat': '27.34411', 'lon': '-111.08805'},
 {'lat': '26.32513', 'lon': '-110.03305'},
 {'lat': '25.45584', 'lon': '-109.68139'},
 {'lat': '24.50032', 'lon': '-108.97805'},
 {'lat': '23.37632', 'lon': '-108.01097'},
 {'lat': '30.04073', 'lon': '-107.30764'},
 {'lat': '28.89356', 'lon': '-104.75806'},
 {'lat': '27.42211', 'lon': '-99.6589'},
 {'lat': '22.89163', 'lon': '-97.98849'},
 {'lat': '24.97901', 'lon': '-102.47223'},
 {'lat': '21.99856', 'lon': '-100.09849'},
 {'lat': '20.27804', 'lon': '-96.05433'},
 {'lat': '18.62149', 'lon': '-98.6039'},
 {'lat': '18.12129', 'lon': '-94.03225'},
 {'lat': '16.27522', 'lon': '-89.37267'},
 {'lat': '14.15624', 'lon': '-84.36143'},
 {'lat': '11.15678', 'lon': '-83.04268'},
 {'lat': '36.13252', 'lon': '-85.1411'},
 {'lat': '36.09704', 'lon': '-84.74548'},
 {'lat': '36.27429', 'lo

In [110]:
len(coordinates)

181

### Using GPS Coordinates to get routes and store to db

In [35]:
def mp_gps_call(url_params, api_key):
    url = 'https://www.mountainproject.com/data/get-routes-for-lat-lon'
    response = requests.get(url, params=url_params)
    
    return response.json()

#### Loop over list of GPS coordinates, get routes and insert into db

In [36]:
for i,coord in enumerate(coordinates):
    ##assign new lat long and use api
    api_key = config.api_key
    lat = coord['lat']   #32.904600
    lon = coord['lon']  #-116.818400
    maxResults =500
    maxDistance = 10
    minDiff = '5.0'
    maxDiff = '5.16'

    url_params = {  'lat': str(lat),
                    'lon': str(lon),
                    'maxResults': maxResults,
    #                 'maxDistance' : str(maxDistance),
    #                 'minDiff': str(minDiff),
    #                 'maxDiff': str(maxDiff)
                    'key': api_key
                 }
    # call function and return 5000 route details
    data = mp_gps_call(url_params, api_key)
    
    #check that call successful 
    if data['success']:
        routes = data['routes']
        
        #store route data in mysql
        query_helper.route_to_tup(routes)
        
        #track our progress
        f = open("so_far_we_got.txt", "a")
        f.write(f'{i} iteration {lat},{lon}')
        f.close()
        
    #print error message if something goes wrong
    else:
        f'Error!! stopped at {i} iteration {lat},{lon}'
        f = open("stopped_at.txt", "a")
        f.write(f'Error!! stopped at {i} iteration {lat},{lon}')
        f.close()
        break
        
    #sleep for a bit to avoid being blocked
    time.sleep(.1)


## Webscrape Mountain Project for  Route Descriptions and Reviews

### Get route ids from MySql db

In [2]:
id_df = query_helper.query_to_df("SELECT id FROM climbs.routes;")
id_df.head()

Unnamed: 0,id
0,105714722
1,105714728
2,105714731
3,105714734
4,105714737


In [3]:
# import config
# import json
# import requests
# api_key = config.api_key
# import time 
# import datetime
# import mysql.connector
# from mysql.connector import errorcode
# import pandas as pd


# #creates connection, all functions will start by calling this
# def connect():
#     global cnx
#     cnx = mysql.connector.connect(
#     host = config.host,
#     user = config.user,
#     passwd = config.password,
#     database = 'climbs')
#     global cursor
#     cursor = cnx.cursor()

In [4]:
def get_route_info(id_):
    
    #get page content
    web_address = f'https://www.mountainproject.com/route/{id_}'
    
    page = requests.get(web_address)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    #grab section of interest
    info =soup.find_all(class_='fr-view')
    
    describe=[]
    for i in range(len(info)):
        describe.append(info[i].get_text())
    
    return describe

In [8]:
for climb_id in remaining:
    ##assign new lat long and use api

    # call function and return 5000 route details
    description = get_route_info(climb_id)
    
    query_helper.route_info_to_db((climb_id, '-'.join(description)))

    #sleep for a bit to avoid being blocked
#     time.sleep(.1)

### Use below to get remaining descriptions

In [5]:
df_describe = query_helper.query_to_df("SELECT id FROM climbs.route_description")
remaining=[]
x=[]
for i in id_df.id:
    if i in df_describe.id.values:
        x.append(i)
    else:
        remaining.append(i)
len(remaining)

304

In [6]:
len(x)

77230

In [7]:
id_df.shape

(77534, 1)

In [129]:
df_describe = query_helper.query_to_df("SELECT id FROM climbs.route_description")

In [130]:
df_describe.shape

(70702, 1)