Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Initial commit
  • Loading branch information
amotl committed Apr 12, 2018
0 parents commit 4e28712
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
/.idea
/.venv*
74 changes: 74 additions & 0 deletions apicast.py
@@ -0,0 +1,74 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
apicast acquires bee flight forecast information published by Deutscher Wetterdienst (DWD)
Parse information about "Gartenwetter » Bienenflug"
from https://www.dwd.de/DE/fachnutzer/freizeitgaertner/1_gartenwetter/_node.html ff.
See also https://community.hiveeyes.org/t/dwd-prognose-bienenflug/787
Prerequisites::
pip install MechanicalSoup==0.10.0 html-table-extractor==1.3.0 tabulate==0.8.2
Synopsis::
python apicast.py https://www.dwd.de/DE/fachnutzer/freizeitgaertner/1_gartenwetter/berlin_brandenburg/potsdam/_node.html
"""
import sys
import mechanicalsoup
from html_table_extractor.extractor import Extractor
from tabulate import tabulate


def grok_beeflight_forecast(url):

# Navigate to HTTP resource
user_agent = u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
browser = mechanicalsoup.StatefulBrowser(user_agent=user_agent)
page = browser.open(url)

# Find content section and extract elements
subject = page.soup.find(string=u'Prognose des Bienenfluges')
station = subject.find_next('br').next.strip()
table = subject.parent.find_next_sibling('table')
# TODO: Read table footer "© Deutscher Wetterdienst, erstellt 12.04.2018 04:00 UTC"

# Read HTML table
data = parse_html_table(unicode(table))

# Ready.
result = {
'station': station,
'data': data,
}
return result


def parse_html_table(html):
extractor = Extractor(html)
extractor.parse()
return extractor.return_list()


if __name__ == '__main__':

# Sanity checks
if len(sys.argv) < 2:
raise KeyError('Please specify url as single positional argument')
url = sys.argv[1]

# Fetch and extract forecast information
result = grok_beeflight_forecast(url)

# Report about weather station / observation location
print
print u'Prognose des Bienenfluges am Standort {}'.format(result['station'])
print

# Output forecast data
data = result['data']
print tabulate(data[1:], headers=data[0], showindex=False, tablefmt='psql').encode('utf-8')

0 comments on commit 4e28712

Please sign in to comment.