-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add intial helpers for scraping sugarloaf report pages
- Loading branch information
Showing
4 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
import dateparser | ||
|
||
URL = 'http://sugarloaf.com/the-mountain/trails-and-lifts' | ||
|
||
|
||
def trail_name(trail): | ||
"""Returns a string containing the name of the trail""" | ||
try: | ||
return trail.contents[0] | ||
except AttributeError: | ||
raise AttributeError(trail) | ||
|
||
|
||
def trail_status(trail): | ||
"""Returns True if the trail is open""" | ||
if 'closed' in trail.attrs['class'] or 'snowmaking-closed' in trail.attrs['class']: | ||
return False | ||
return True | ||
|
||
def trail_snowmaking(trail): | ||
"""Returns true if snowmaking in progress""" | ||
if 'snowmaking-closed' in trail.attrs['class'] or 'snowmaking-open' in trail.attrs['class']: | ||
return True | ||
return False | ||
|
||
dificulty = {'beginner', 'intermediate', 'double-black', 'black', 'terrain-park'} | ||
|
||
|
||
def trail_difficulty(trail): | ||
"""Returns a string with the difficulty of given trail""" | ||
dif = set(trail.attrs['class']).intersection(dificulty) | ||
try: | ||
return list(dif)[0] | ||
except IndexError: | ||
raise IndexError(trail) | ||
|
||
|
||
def trail_groomed(trail): | ||
"""Returns true if the trail has been groomed""" | ||
if 'groomed' in trail.attrs['class']: | ||
return True | ||
return False | ||
|
||
|
||
def trail_terrain_park(trail): | ||
"""Returns True if the trail is a terrain park""" | ||
if 'terrain-park' in trail.attrs['class']: | ||
return True | ||
return False | ||
|
||
|
||
def trail_area(trail): | ||
"""Returns a string with the area of the mountain the trail is in""" | ||
try: | ||
return trail.find_previous_sibling('h3').contents[0] | ||
except AttributeError: | ||
raise AttributeError(trail) | ||
|
||
|
||
def update_trails(soup): | ||
"""Yields dicts with Sugarloaf trails names, current status, and other attributes""" | ||
|
||
|
||
trail_status_div = soup.find('div', {'class': 'trail-status'}) | ||
|
||
all_trail_divs = trail_status_div.find_all('div', {'class', 'trail'}) | ||
|
||
for trail_div in all_trail_divs: | ||
yield { | ||
'name': trail_name(trail_div), | ||
'open': trail_status(trail_div), | ||
'difficulty': trail_difficulty(trail_div), | ||
'groomed': trail_groomed(trail_div), | ||
'terrain-park': trail_terrain_park(trail_div), | ||
'area': trail_area(trail_div), | ||
'snowmaking': trail_snowmaking(trail_div) | ||
} | ||
|
||
|
||
def lift_name(lift): | ||
"""Returns a string with the lifts name""" | ||
return lift.contents[0] | ||
|
||
|
||
statuses = {'open', 'closed'} | ||
def lift_status(lift): | ||
"""Returns the lift status""" | ||
status = set(lift.attrs['class']).intersection(statuses) | ||
try: | ||
return list(status)[0] | ||
except IndexError: | ||
raise IndexError(lift) | ||
|
||
def update_lifts(soup): | ||
"""Yields dicts with Sugarloaf lift names and statuses""" | ||
div_lift_status = div_trail_status = soup.find('div', {'class': 'lift-status'}) | ||
lifts_divs = div_lift_status.find_all('div', {'class': 'lift'}) | ||
|
||
for lift_div in lifts_divs: | ||
yield { | ||
'name': lift_name(lift_div), | ||
'status': lift_status(lift_div) | ||
} | ||
|
||
|
||
def update_time(soup): | ||
"""Returns datetime when the lift and trail report was last updated""" | ||
right_content = soup.find('div', {'class': 'content--right'}) | ||
condition_update_string = right_content.find('small').contents[0] | ||
condition_time_string = condition_update_string.strip().split('of')[1] | ||
return dateparser.parse(condition_time_string) | ||
|
||
|
||
if __name__ == '__main__': | ||
import json | ||
|
||
r = requests.get(URL) | ||
soup = BeautifulSoup(r.content, 'lxml') | ||
|
||
trails = list(update_trails(soup)) | ||
|
||
lifts = list(update_lifts(soup)) | ||
|
||
all_statuses = {'trails': trails, | ||
'lifts': lifts, | ||
'update datetime': update_time(soup).isoformat()} | ||
|
||
with open('sugarloaf.json', 'w') as f: | ||
json.dump(all_statuses, f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
import dateparser | ||
|
||
URL = 'http://sugarloaf.com/the-mountain/daily-report' | ||
|
||
def update_time(soup): | ||
"""Returns datetime when the lift and trail report was last updated""" | ||
right_content = soup.find('div', {'class': 'content--right'}) | ||
condition_update_string = right_content.find('small').contents[0] | ||
condition_time_string = condition_update_string.strip().split('of')[1] | ||
return dateparser.parse(condition_time_string) | ||
|
||
|
||
def report_text(soup): | ||
"""Returns the HTML paragraphs from the daily report""" | ||
report_div = soup.find('div', {'class': 'daily-report'}) | ||
output = '' | ||
|
||
paragraphs = report_div.find_all('p') | ||
|
||
for p in paragraphs: | ||
output += p.decode() | ||
|
||
return output | ||
|
||
|
||
def report_reporter(soup): | ||
"""Returns a string with the current Snow Reporter's name""" | ||
report_div = soup.find('div', {'class': 'daily-report'}) | ||
reporter = report_div.find('div', {'class': 'signature'}).find('strong') | ||
return reporter.contents[0] |