diff --git a/backfill_function.py b/backfill_function.py deleted file mode 100644 index a41771c..0000000 --- a/backfill_function.py +++ /dev/null @@ -1,91 +0,0 @@ -from scraper import * -from main import main -from slack import * -from lib import yaml -from lib import requests -from lib import pymysql -from lib.bs4 import BeautifulSoup, SoupStrainer -import datetime -import functools -import json -import logging -import os -import re -import sys - -from collections import defaultdict - -sys.path.insert(1, './lib') -sys.path.insert(1, './src') - - -logging.basicConfig( - filename=LOG_PATH, format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) -logging.info("=== Run started ===") -logger = logging.getLogger() -logger.setLevel(logging.INFO) - - -def lambda_handler(event, context): - try: - response = get_page(os.environ['untappd_url']) - checkins = gather_checkins(response) - - logger.info("gathereed " + str(len(checkins)) + " checkins") - - latest_checkin_id = main(logger, checkins, event['latest_checkin_id']) - - if latest_checkin_id is None: - latest_checkin_id = event['latest_checkin_id'] - - return { - 'statusCode': 200, - 'last_checkin_id': latest_checkin_id - } - except Exception as e: - return { - 'statusCode': 500, - 'error': e - } - - -def main(logger, checkins, last_checkin_id): - slackblock_stack = [] - # the checkins are fetched latest -> oldest - # > clean checkin data - # > store the first checkin's id as 'latest' -- this is the latest checkin reported by untappd - # > if the current checkin's id is the same as the one from last run, then we've run out of new checkins - # > if the previous is true, AND the current checkin's id is the same as the 'latest' checkin's id, then there are no new checkins at all - # > just terminate, no work left to do - # > else, store the ID and finish sending the remaining new checkins - # > store the latest checkin ID and move on - - # in order to display them in the right order, the list needs to be reversed - # also need to store the latest checkin's id for next run, which is easier when it's at index [0] - - # fetch checkins in default untappd order (latest -> oldest) - latest_checkin_id = None - for i, c in enumerate(checkins): - clean_checkin = scrape_checkin(c) - - if i == 0: - latest_checkin_id = clean_checkin['checkin_id'] - - if clean_checkin['checkin_id'] == last_checkin_id: - if clean_checkin['checkin_id'] == latest_checkin_id: - logger.info("no new checkins") - return - # write_latest_checkin_id(latest_checkin_id) - break - - # convert to slack message blocks - slackblock_stack.append(build_slackblock(clean_checkin)) - - # reverse and continue - slackblock_stack.reverse() - - for c in slackblock_stack: - post_to_webhook(os.environ['webhook_url'], json.dumps(c)) - - return latest_checkin_id - # write_latest_checkin_id(latest_checkin_id, config) diff --git a/lambda_function.py b/lambda-daily_doug.py similarity index 99% rename from lambda_function.py rename to lambda-daily_doug.py index d60898e..411fc0b 100755 --- a/lambda_function.py +++ b/lambda-daily_doug.py @@ -33,7 +33,6 @@ def lambda_handler(event, context): response = get_page(os.environ['untappd_url']) checkins = gather_checkins(response) - logger.info("gathereed " + str(len(checkins)) + " checkins" ) latest_checkin_id = main(logger, checkins, event['latest_checkin_id']) diff --git a/deploy_to_lambda.sh b/lambda-deploy-daily_doug.sh similarity index 75% rename from deploy_to_lambda.sh rename to lambda-deploy-daily_doug.sh index 6e8572c..e296441 100644 --- a/deploy_to_lambda.sh +++ b/lambda-deploy-daily_doug.sh @@ -1,6 +1,6 @@ #!/bin/bash -zip -r package.zip lambda_function.py lib/ src/ && \ +zip -r package.zip lambda-daily_doug.py lib/ src/ && \ aws lambda update-function-code \ --function-name dailyDoug \ --zip-file fileb://./package.zip \ diff --git a/lambda-deploy-untappd_api.sh b/lambda-deploy-untappd_api.sh new file mode 100644 index 0000000..dae6f16 --- /dev/null +++ b/lambda-deploy-untappd_api.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +zip -r package.zip lambda-untappd_api.py lib/ src/ && \ +aws lambda update-function-code \ + --function-name untappdApi \ + --zip-file fileb://./package.zip \ + --profile optate \ + --region us-east-2 diff --git a/lambda-untappd_api.py b/lambda-untappd_api.py new file mode 100755 index 0000000..dfaca89 --- /dev/null +++ b/lambda-untappd_api.py @@ -0,0 +1,54 @@ +import datetime +import functools +import json +import logging +import os +import re +import sys + +from collections import defaultdict + +sys.path.insert(1, './lib') +sys.path.insert(1, './src') + +from lib.bs4 import BeautifulSoup, SoupStrainer +from lib import pymysql +from lib import requests +from lib import yaml + + +from scraper import * +from slack import * +from untappd_api import * + + + +logging.basicConfig( + filename=LOG_PATH, format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.info("=== Run started ===") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def lambda_handler(event, context): + + if not event['queryStringParameters'] or not event['queryStringParameters']['beercode']: + return { + 'statusCode': 400, + 'msg': 'Did not get beercode', + 'event': event + } + + beercode = event['queryStringParameters']['beercode'] + beer = fetchBeerInfo(beercode) + beer_info = fetchDougBeerInfo(beer['beer_slug']) + checkin_review = fetchCheckinReview(beer_info['recent_checkin_id']) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'beer': beer, + 'beer_info': beer_info, + 'review': checkin_review + }) + } diff --git a/src/scraper.py b/src/scraper.py index 3fef49a..dbe0b7f 100755 --- a/src/scraper.py +++ b/src/scraper.py @@ -18,6 +18,10 @@ filename=LOG_PATH, format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) logging.info("=== Run started ===") +class checkin_info: + def __init__(self): + self.review = '' + def prepend_hostname(path: str) -> str: return "https://untappd.com" + path @@ -35,7 +39,6 @@ def find_rating_in_class_list(classes: list) -> str: raise Exception( "could not find rating in classlist: [{}]".format(', '.join(classes))) - def scrape_checkin_date(checkin_container) -> dict: feedback = checkin_container.find( 'div', class_='checkin').find('div', class_='feedback') @@ -43,6 +46,19 @@ def scrape_checkin_date(checkin_container) -> dict: return date +def scrape_checkin_review(http_response_obj: requests.Response): + checkin_info = [] + soup = BeautifulSoup(http_response_obj.content, 'html.parser') + find_all_result = soup.find_all(id=re.compile(r"^translate_\d+$")) + if (len(find_all_result) != 1): + print("found {} comments on page.".format(len(find_all_result))) + return None + node = find_all_result[0] + + return node.text.strip() + + + def scrape_checkin(checkin_container) -> dict: checkin_id = checkin_container['data-checkin-id'] logging.info("processing checkin {}".format(checkin_id)) diff --git a/src/untapped_api.py b/src/untappd_api.py similarity index 62% rename from src/untapped_api.py rename to src/untappd_api.py index 2a449d7..491b652 100644 --- a/src/untapped_api.py +++ b/src/untappd_api.py @@ -1,6 +1,9 @@ import requests import os +import scraper + + def fetchBeerInfo(barcode): url = "https://api.untappd.com/v4/beer/checkbarcodemultiple?&access_token={}&upc={}".format( os.environ['untappd_access_token'], @@ -8,6 +11,7 @@ def fetchBeerInfo(barcode): ) resp = requests.get(url) resp.raise_for_status() + beer = list.pop(resp.json()['response']['items'])['beer'] return beer @@ -19,20 +23,14 @@ def fetchDougBeerInfo(beer_slug): ) resp = requests.get(url) resp.raise_for_status() + beer = list.pop(resp.json()['response']['beers']['items']) - return beer['rating_score'] - -''' - fetchDetailedCheckin = async (checkinId) => { - return fetch('https://untappd.com/user/doug1516/checkin/' + checkinId) - .then((response) => { - return response.text() - }).then((html) => { - var doc = this.domParser.parse(html) - doc.getElementsByClassName('caps') - }) - } - -} - -''' + return beer + +def fetchCheckinReview(checkinId): + url = "https://untappd.com/user/doug1516/checkin/{}".format(checkinId) + resp = requests.get(url, headers={'User-agent': 'catch me if you can, dirtbags'}) + resp.raise_for_status() + + return scraper.scrape_checkin_review(resp) + diff --git a/test.py b/test.py index c691527..9529272 100755 --- a/test.py +++ b/test.py @@ -1,5 +1,35 @@ -from scraper import logtest +import datetime +import functools +import json import logging -import requests +import os +import pprint +import re +import sys -logtest('l') +from collections import defaultdict + +sys.path.insert(1, './lib') +sys.path.insert(1, './src') + + +from lib import yaml +from lib import requests +from lib import pymysql +from lib.bs4 import BeautifulSoup, SoupStrainer + +from scraper import * +from slack import * + +from untappd_api import * + + +pp = pprint.PrettyPrinter(indent=3) + +beer = fetchBeerInfo('0062067313356') +# pp.pprint(beer) +# print("===") +beer_info = fetchDougBeerInfo(beer['beer_slug']) +pp.pprint(beer_info) +checkin_review = fetchCheckinReview(beer_info['recent_checkin_id']) +print(checkin_review)