# Gather Data

In [1]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#for api calls
from bs4 import BeautifulSoup
import requests
import json
import time

#personal api and aws keys
import config

import query_helper
# import websrcape_wiki_table

## Create climbs db

In [None]:
query_helper.create_db('climbs')

## Create route tables

In [None]:
route_table = """
CREATE TABLE routes (
    id int NOT NULL,
    name varchar(100),
    type varchar(30),
    rating varchar(20),
    stars float,
    starVotes int,
    pitches int,
    location varchar(255),
    url varchar(255),
    longitude float,
    latitude float,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_table)

In [None]:
route_describe_table = """
CREATE TABLE route_description (
    id int NOT NULL,
    info TEXT,
    PRIMARY KEY (id)
    );
"""

In [None]:
query_helper.create_table(route_describe_table)

## Use Mountain Project Api

### Get GPS Coordinates

In [2]:
import xml.etree.ElementTree as ET
tree = ET.parse('waypoints (1).xml')
root = tree.getroot()

In [4]:
coordinates = []
for child in root:
    coordinates.append(child.attrib)

In [5]:
coordinates

[{'lat': '32.72415', 'lon': '-116.97993'},
 {'lat': '32.90872', 'lon': '-117.00192'},
 {'lat': '33.12049', 'lon': '-117.07061'},
 {'lat': '33.00776', 'lon': '-116.48267'},
 {'lat': '33.52393', 'lon': '-117.59544'},
 {'lat': '34.13403', 'lon': '-116.3363'},
 {'lat': '34.08403', 'lon': '-116.28684'},
 {'lat': '34.06129', 'lon': '-116.24288'},
 {'lat': '34.02831', 'lon': '-116.21678'},
 {'lat': '34.02148', 'lon': '-116.17969'},
 {'lat': '34.03058', 'lon': '-116.13024'},
 {'lat': '34.08289', 'lon': '-116.14535'},
 {'lat': '34.08517', 'lon': '-116.18794'},
 {'lat': '34.06925', 'lon': '-116.16046'},
 {'lat': '34.02831', 'lon': '-116.09452'},
 {'lat': '33.99987', 'lon': '-116.13574'},
 {'lat': '34.22034', 'lon': '-117.08074'},
 {'lat': '33.98394', 'lon': '-117.4956'},
 {'lat': '33.968', 'lon': '-118.3418'},
 {'lat': '34.38135', 'lon': '-117.92145'},
 {'lat': '34.44581', 'lon': '-119.20785'},
 {'lat': '34.58152', 'lon': '-119.97712'},
 {'lat': '34.64478', 'lon': '-119.69139'},
 {'lat': '35.426

In [None]:
len(coordinates)

### Using GPS Coordinates to get routes and store to db

In [None]:
def mp_gps_call(url_params, api_key):
    url = 'https://www.mountainproject.com/data/get-routes-for-lat-lon'
    response = requests.get(url, params=url_params)
    
    return response.json()

#### Loop over list of GPS coordinates, get routes and insert into db

In [None]:
for i,coord in enumerate(coordinates):
    ##assign new lat long and use api
    api_key = config.api_key
    lat = coord['lat']   #32.904600
    lon = coord['lon']  #-116.818400
    maxResults =500
    maxDistance = 10
    minDiff = '5.0'
    maxDiff = '5.16'

    url_params = {  'lat': str(lat),
                    'lon': str(lon),
                    'maxResults': maxResults,
    #                 'maxDistance' : str(maxDistance),
    #                 'minDiff': str(minDiff),
    #                 'maxDiff': str(maxDiff)
                    'key': api_key
                 }
    # call function and return 5000 route details
    data = mp_gps_call(url_params, api_key)
    
    #check that call successful 
    if data['success']:
        routes = data['routes']
        
        #store route data in mysql
        query_helper.route_to_tup(routes)
        
        #track our progress
        f = open("so_far_we_got.txt", "a")
        f.write(f'{i} iteration {lat},{lon}')
        f.close()
        
    #print error message if something goes wrong
    else:
        f'Error!! stopped at {i} iteration {lat},{lon}'
        f = open("stopped_at.txt", "a")
        f.write(f'Error!! stopped at {i} iteration {lat},{lon}')
        f.close()
        break
        
    #sleep for a bit to avoid being blocked
    time.sleep(3)


## Webscrape Mountain Project for  Route Descriptions and Reviews

### Get route ids from MySql db

In [2]:
id_df = query_helper.query_to_df("SELECT id FROM climbs.routes;")
id_df.head()

Unnamed: 0,id
0,105714722
1,105714728
2,105714731
3,105714734
4,105714737


In [None]:
# import config
# import json
# import requests
# api_key = config.api_key
# import time 
# import datetime
# import mysql.connector
# from mysql.connector import errorcode
# import pandas as pd


# #creates connection, all functions will start by calling this
# def connect():
#     global cnx
#     cnx = mysql.connector.connect(
#     host = config.host,
#     user = config.user,
#     passwd = config.password,
#     database = 'climbs')
#     global cursor
#     cursor = cnx.cursor()

In [10]:
def get_route_info(id_):
    
    #get page content
    web_address = f'https://www.mountainproject.com/route/{id_}'
    
    page = requests.get(web_address)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    #grab section of interest
    info =soup.find_all(class_='fr-view')
    
    describe=[]
    for i in range(len(info)):
        describe.append(info[i].get_text())
    
    return describe

In [57]:
for climb_id in remaining:
    ##assign new lat long and use api

    # call function and return 5000 route details
    description = get_route_info(climb_id)
    
    query_helper.route_info_to_db((climb_id, '-'.join(description)))

    #sleep for a bit to avoid being blocked
#     time.sleep(1)

### Use below to get remaining descriptions

In [55]:
# df_describe = query_helper.query_to_df("SELECT id FROM climbs.route_description")
# remaining=[]
# x=[]
# for i in id_df.id:
#     if i in df_describe.id.values:
#         x.append(i)
#     else:
#         remaining.append(i)
# len(remaining)