# Gather Data

In [62]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#for api calls
from bs4 import BeautifulSoup
import requests
import json
import time

#personal api and aws keys
import config

import query_helper
# import websrcape_wiki_table

## Create climbs db

In [None]:
query_helper.create_db('climbs')

## Create route tables

In [None]:
route_table = """
CREATE TABLE routes (
    id int NOT NULL,
    name varchar(100),
    type varchar(30),
    rating varchar(20),
    stars float,
    starVotes int,
    pitches int,
    location varchar(255),
    url varchar(255),
    longitude float,
    latitude float,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_table)

In [None]:
route_describe_table = """
CREATE TABLE route_description (
    id int NOT NULL,
    info TEXT,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_describe_table)

## Use Mountain Project Api

### Get GPS Coordinates

In [63]:
import xml.etree.ElementTree as ET
tree = ET.parse('waypoints (4).xml')
root = tree.getroot()

In [64]:
coordinates = []
for child in root:
    coordinates.append(child.attrib)

In [65]:
coordinates

[{'lat': '31.47494', 'lon': '-116.34096'},
 {'lat': '30.64721', 'lon': '-115.81346'},
 {'lat': '29.73609', 'lon': '-115.11013'},
 {'lat': '28.73963', 'lon': '-114.31888'},
 {'lat': '27.34411', 'lon': '-113.08805'},
 {'lat': '26.32513', 'lon': '-112.03305'},
 {'lat': '25.45584', 'lon': '-111.68139'},
 {'lat': '24.50032', 'lon': '-110.97805'},
 {'lat': '23.37632', 'lon': '-110.01097'},
 {'lat': '30.04073', 'lon': '-109.30764'},
 {'lat': '28.89356', 'lon': '-106.75806'},
 {'lat': '27.42211', 'lon': '-101.6589'},
 {'lat': '22.89163', 'lon': '-99.98849'},
 {'lat': '24.97901', 'lon': '-104.47223'},
 {'lat': '21.99856', 'lon': '-102.09849'},
 {'lat': '20.27804', 'lon': '-98.05433'},
 {'lat': '18.62149', 'lon': '-100.6039'},
 {'lat': '18.12129', 'lon': '-96.03225'},
 {'lat': '16.27522', 'lon': '-91.37267'},
 {'lat': '14.15624', 'lon': '-86.36143'},
 {'lat': '11.15678', 'lon': '-85.04268'},
 {'lat': '36.13252', 'lon': '-87.1411'},
 {'lat': '36.09704', 'lon': '-86.74548'},
 {'lat': '36.27429', '

In [66]:
len(coordinates)

181

### Using GPS Coordinates to get routes and store to db

In [67]:
def mp_gps_call(url_params, api_key):
    url = 'https://www.mountainproject.com/data/get-routes-for-lat-lon'
    response = requests.get(url, params=url_params)
    
    return response.json()

#### Loop over list of GPS coordinates, get routes and insert into db

In [68]:
for i,coord in enumerate(coordinates):
    ##assign new lat long and use api
    api_key = config.api_key
    lat = coord['lat']   #32.904600
    lon = coord['lon']  #-116.818400
    maxResults =500
    maxDistance = 10
    minDiff = '5.0'
    maxDiff = '5.16'

    url_params = {  'lat': str(lat),
                    'lon': str(lon),
                    'maxResults': maxResults,
    #                 'maxDistance' : str(maxDistance),
    #                 'minDiff': str(minDiff),
    #                 'maxDiff': str(maxDiff)
                    'key': api_key
                 }
    # call function and return 5000 route details
    data = mp_gps_call(url_params, api_key)
    
    #check that call successful 
    if data['success']:
        routes = data['routes']
        
        #store route data in mysql
        query_helper.route_to_tup(routes)
        
        #track our progress
        f = open("so_far_we_got.txt", "a")
        f.write(f'{i} iteration {lat},{lon}')
        f.close()
        
    #print error message if something goes wrong
    else:
        f'Error!! stopped at {i} iteration {lat},{lon}'
        f = open("stopped_at.txt", "a")
        f.write(f'Error!! stopped at {i} iteration {lat},{lon}')
        f.close()
        break
        
    #sleep for a bit to avoid being blocked
    time.sleep(.1)


## Webscrape Mountain Project for  Route Descriptions and Reviews

### Get route ids from MySql db

In [69]:
id_df = query_helper.query_to_df("SELECT id FROM climbs.routes;")
id_df.head()

Unnamed: 0,id
0,105714722
1,105714728
2,105714731
3,105714734
4,105714737


In [70]:
# import config
# import json
# import requests
# api_key = config.api_key
# import time 
# import datetime
# import mysql.connector
# from mysql.connector import errorcode
# import pandas as pd


# #creates connection, all functions will start by calling this
# def connect():
#     global cnx
#     cnx = mysql.connector.connect(
#     host = config.host,
#     user = config.user,
#     passwd = config.password,
#     database = 'climbs')
#     global cursor
#     cursor = cnx.cursor()

In [84]:
def get_route_info(id_):
    
    #get page content
    web_address = f'https://www.mountainproject.com/route/{id_}'
    
    page = requests.get(web_address)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    #grab section of interest
    info =soup.find_all(class_='fr-view')
    
    describe=[]
    for i in range(len(info)):
        describe.append(info[i].get_text())
    
    return describe

In [85]:
for climb_id in remaining:
    ##assign new lat long and use api

    # call function and return 5000 route details
    description = get_route_info(climb_id)
    
    query_helper.route_info_to_db((climb_id, '-'.join(description)))

    #sleep for a bit to avoid being blocked
#     time.sleep(.1)

### Use below to get remaining descriptions

In [86]:
df_describe = query_helper.query_to_df("SELECT id FROM climbs.route_description")
remaining=[]
x=[]
for i in id_df.id:
    if i in df_describe.id.values:
        x.append(i)
    else:
        remaining.append(i)
len(remaining)

0

In [87]:
len(x)

65962

In [80]:
id_df.shape

(65962, 1)

In [81]:
df_describe = query_helper.query_to_df("SELECT id FROM climbs.route_description")

In [82]:
df_describe.shape

(58461, 1)