In [22]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser

In [23]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [24]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [25]:
# Define database and collection
db = client.surf_db
collection = db.surf_summary

In [26]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [27]:
# URL of page to be scraped
url = 'https://www.surfline.com/surf-reports-forecasts-cams/costa-rica/3624060'

#Use splinter... bc there is js on this page
browser.visit(url)

In [28]:
# Create BeautifulSoup object; parse with html parser
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [29]:
#this element includes all the info we want (link and location name/title)
mainInfo = soup.find_all('div', class_='sl-spot-list__ref')

#make a list to hold links gathered
link_list=[]

for main in mainInfo:
    # Error handling
    try:
        # Identify and return location of element
        location = main.a['title']
        #location = main.find('a', class_='sl-cam-list-link').title
        # Identify and return link to element
        link = "https://www.surfline.com"+main.a['href']

        # Run only if title, price, and link are available
        if (location and link):
            # Print results
            print('-------------')
            print(location)
            print(link)
            
            #append the list with the link
            link_list.append(link)

            # Dictionary to be inserted as a MongoDB document
            post = {
                'location': location,
                'url': link
            }

            collection.insert_one(post)

    except Exception as e:
        print(e)

-------------
Camaronal Surf Report & Forecast
https://www.surfline.com/surf-report/camaronal/584204204e65fad6a7709140
-------------
Puerto Sandino Surf Report & Forecast
https://www.surfline.com/surf-report/puerto-sandino/5842041f4e65fad6a7708f1c
'NoneType' object is not subscriptable
-------------
Shack's/Shifty's Surf Report & Forecast
https://www.surfline.com/surf-report/shack-s-shifty-s/584204204e65fad6a7709983
-------------
Miramar Surf Report & Forecast
https://www.surfline.com/surf-report/miramar/584204204e65fad6a7709147
-------------
El Transito Surf Report & Forecast
https://www.surfline.com/surf-report/el-transito/584204204e65fad6a7709078
-------------
Popoyo Area Rovercam Surf Report & Forecast
https://www.surfline.com/surf-report/popoyo-area-rovercam/5842041f4e65fad6a7708db9
-------------
Santana Surf Report & Forecast
https://www.surfline.com/surf-report/santana/5842041f4e65fad6a7708b44
-------------
Panga Drops Surf Report & Forecast
https://www.surfline.com/surf-report/

In [30]:
print(link_list)

['https://www.surfline.com/surf-report/camaronal/584204204e65fad6a7709140', 'https://www.surfline.com/surf-report/puerto-sandino/5842041f4e65fad6a7708f1c', 'https://www.surfline.com/surf-report/shack-s-shifty-s/584204204e65fad6a7709983', 'https://www.surfline.com/surf-report/miramar/584204204e65fad6a7709147', 'https://www.surfline.com/surf-report/el-transito/584204204e65fad6a7709078', 'https://www.surfline.com/surf-report/popoyo-area-rovercam/5842041f4e65fad6a7708db9', 'https://www.surfline.com/surf-report/santana/5842041f4e65fad6a7708b44', 'https://www.surfline.com/surf-report/panga-drops/584204204e65fad6a7709146', 'https://www.surfline.com/surf-report/playa-maderas/5842041f4e65fad6a7708dc0', 'https://www.surfline.com/surf-report/tamarindo/5842041f4e65fad6a7708b9b', 'https://www.surfline.com/surf-report/samara/584204204e65fad6a770913d', 'https://www.surfline.com/surf-report/playa-esterillos/5842041f4e65fad6a7708b4f', 'https://www.surfline.com/surf-report/las-pe-itas/5a1dfd8b1d8788001b

In [31]:
#now we use headless browser to navigate through these links
#executable_path_2 = {'executable_path_2': '/usr/local/bin/chromedriver'}
#browser2 = Browser('chrome', **executable_path_2, headless=True)

In [None]:
for link in link_list:

    browser.visit(link)

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    #once we navigate into link...to grab location and surf height
    resultsLocation = soup.find('h1', class_='sl-forecast-header__main__title').get_text()
    resultsWaves = soup.find('span', class_='quiver-surf-height').get_text()
    #to find the air and water temps... we need the text from the following:
    resultsWaterTemp = soup.find('div', class_='sl-wetsuit-recommender__weather').find_all("div")[0].get_text()
    resultsAirTemp = soup.find('div', class_='sl-wetsuit-recommender__weather').find_all("div")[1].get_text()

    print('Link:', link)
    print('Location: ', resultsLocation)
    print('Surf height: ', resultsWaves)
    print('Water Temp: ', resultsWaterTemp)
    print('Air Temp: ', resultsAirTemp)
    print('-------------')

    # Dictionary to be inserted as a MongoDB document
    post = {
        'surf height': resultsWaves,
        'water temp': resultsWaterTemp,
        'air temp': resultsAirTemp
    }

    collection.insert_one(post)

    



Link: https://www.surfline.com/surf-report/camaronal/584204204e65fad6a7709140 -------------
Location:  Camaronal Surf Report & Forecast
Surf height:  4-6FT
Water Temp:  84 - 86 ºF
Air Temp:  80 ºF
Link: https://www.surfline.com/surf-report/puerto-sandino/5842041f4e65fad6a7708f1c -------------
Location:  Puerto Sandino Surf Report & Forecast
Surf height:  6-8FT
Water Temp:  84 - 86 ºF
Air Temp:  82 ºF
Link: https://www.surfline.com/surf-report/shack-s-shifty-s/584204204e65fad6a7709983 -------------
Location:  Shack's/Shifty's Surf Report & Forecast
Surf height:  6-8FT
Water Temp:  84 - 86 ºF
Air Temp:  82 ºF
Link: https://www.surfline.com/surf-report/miramar/584204204e65fad6a7709147 -------------
Location:  Miramar Surf Report & Forecast
Surf height:  6-8FT
Water Temp:  84 - 86 ºF
Air Temp:  82 ºF
Link: https://www.surfline.com/surf-report/el-transito/584204204e65fad6a7709078 -------------
Location:  El Transito Surf Report & Forecast
Surf height:  6-8FT
Water Temp:  84 - 86 ºF
Air Temp

Link: https://www.surfline.com/surf-report/bocas-del-toro/5842041f4e65fad6a7708dc1 -------------
Location:  Bocas del Toro Surf Report & Forecast
Surf height:  4-5FT
Water Temp:  84 - 86 ºF
Air Temp:  82 ºF
Link: https://www.surfline.com/surf-report/westfalia/584204204e65fad6a7709143 -------------
Location:  Westfalia Surf Report & Forecast
Surf height:  3-4FT
Water Temp:  84 - 86 ºF
Air Temp:  80 ºF
Link: https://www.surfline.com/surf-report/cahuita/5842041f4e65fad6a7708c33 -------------
Location:  Cahuita Surf Report & Forecast
Surf height:  3-4FT
Water Temp:  84 - 86 ºF
Air Temp:  78 ºF
Link: https://www.surfline.com/surf-report/salsa-brava/5842041f4e65fad6a7708ba7 -------------
Location:  Salsa Brava Surf Report & Forecast
Surf height:  3-4FT
Water Temp:  84 - 86 ºF
Air Temp:  79 ºF
Link: https://www.surfline.com/surf-report/playa-cocles/584204204e65fad6a7709142 -------------
Location:  Playa Cocles Surf Report & Forecast
Surf height:  3-4FT
Water Temp:  84 - 86 ºF
Air Temp:  79 ºF