Проект представляет собой расширение к веб-версии приложения для биргиков (любителей крафтового пива) Untappd, позволяющее пользоваться рядом функций, доступных пользователю только при оформлении подписки и/или получении ключа API, выдача которых почти полностью остановилась. Для исполнения всех ячеек требуется наличие браузера Firefox и драйвера geckodriver. 

In [1]:
import os
import time
import functools
from collections import Counter
from transliterate import translit
from bs4 import BeautifulSoup 
from urllib import request
from selenium import webdriver
from selenium.common.exceptions import ElementNotInteractableException

Класс BeerDataExport запускает браузер через selenium (для этого нужен второй профиль браузера и geckodriver – без них не удастся обойти капчу при логине), прокручивает до низа выбранный url и экспортирует полученную html-страницу в файл.

In [2]:
class BeerDataExport:
    
    def __init__(self, ffdir, geckodriver_dir, url, source_type='bar'):
        
        # source_type options: 'bar', 'brewery', 'acc' (personal account export), 'wishlist'
        
        self.ffdir = ffdir
        self.geckodriver_dir = geckodriver_dir
        self.url = url
        self.source_type = source_type
        self.run_the_browser(ffdir, geckodriver_dir, url, source_type)
    
    def run_the_browser(self, ffdir, geckodriver_dir, url, source_type):
        
        ffprofile = webdriver.FirefoxProfile(ffdir)
        driver = webdriver.Firefox(executable_path = geckodriver_dir, firefox_profile=ffprofile) 
        driver.get(url)
        
        if self.source_type == 'bar':
            filename = 'bar'
            btn = driver.find_element_by_css_selector("a.yellow.button.more.show-more-section.track-click")
        elif self.source_type == 'brewery':
            filename = 'brewery'
            btn = driver.find_element_by_css_selector("a.button.yellow")
        elif self.source_type == 'acc':
            filename = 'acc'
            btn = driver.find_element_by_css_selector("a.button.yellow.more.more-list-items.track-click")
        elif self.source_type == 'wishlist':
            filename = 'wishlist'
            btn = driver.find_element_by_css_selector("a.yellow.button.more.more-list-items")
        else:
            print('no such source type.')
        
        old_position = 0
        new_position = None

        while True:
            try:
                while new_position != old_position:
                    old_position = driver.execute_script(
                            ("return (window.pageYOffset !== undefined) ?"
                             " window.pageYOffset : (document.documentElement ||"
                             " document.body.parentNode || document.body);"))
                    time.sleep(1)
                    driver.execute_script((
                            "var scrollingElement = (document.scrollingElement ||"
                            " document.body);scrollingElement.scrollTop ="
                            " scrollingElement.scrollHeight;"))
                    btn.click()
                    new_position = driver.execute_script(
                            ("return (window.pageYOffset !== undefined) ?"
                             " window.pageYOffset : (document.documentElement ||"
                             " document.body.parentNode || document.body);"))
            except ElementNotInteractableException:
                with open('{}.html'.format(filename), 'w') as f:
                    f.write(driver.page_source)
                break 

Тесты 

In [3]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/v/share-house/7728362?ng_menu_id=2af7b1fd-7ca0-4ed7-9ecf-4db4920877c9',
               source_type='bar')

<__main__.BeerDataExport at 0x7fc17ea73110>

In [4]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/AntiFactory/beer',
               source_type='brewery')

<__main__.BeerDataExport at 0x7fc17eaba550>

In [5]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/user/sheamrock/beers',
               source_type='acc')

<__main__.BeerDataExport at 0x7fc184601150>

In [6]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/user/mettathrone/wishlist',
               source_type='wishlist')

<__main__.BeerDataExport at 0x7fc184606750>

Класс BeerDataParse содержит парсер полученной ранее html-страницы, складывающий данные о позициях в список словарей. Также класс содержит декоратор, позволяющий привести выдачу парсера к более комфортно читаемому виду.

In [7]:
class BeerDataParse:
    
    def __init__(self):
        pass
    
    def BeautifyOutput(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            bar_beauty = func(*args, **kwargs)
            result = []
            for beer in bar_beauty:
                result.append(beer['beer'] + '\n' + beer['style'] + '\n' + str(beer['rating']) + '\n' 
                      + beer['brewery'] + '\n' + beer['link'] + '\n\n')
            return result
        return wrapper
               
    def parse_the_list(self, filename, source_type):
        
        beers = []
        
        with open(filename) as f:
            contents = f.read()
        soup = BeautifulSoup(contents)
        
        if source_type == 'bar':
            
            beers_raw = soup.find_all("div", class_="beer-details")
            for beer in beers_raw:
                h5 = beer.find('h5')
                h5_a = h5.find('a')    
                beer_link = f"https://untappd.com{h5_a['href']}"
                beer_name = h5_a.get_text()
                brewery = beer.find("a", attrs={"data-href": ":brewery"}).get_text()
                style = h5.find("em").get_text()
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link})
    
        elif source_type == 'brewery':
            
            ratings_raw = soup.find_all('span', class_='num')
            ratings = []
            for rt in ratings_raw:
                rating_raw = rt.get_text()
                rating_line = str(rating_raw)
                rating = rating_line[1:-1]
                ratings.append(rating)
            beers_raw = soup.find_all('div', class_='beer-details')
            i = 0
            for beer in beers_raw:
                a = beer.find('a')
                beer_name = beer.find('p').get_text()
                beer_link = f"https://untappd.com{a['href']}"
                brewery = 'similar'
                style = beer.find('p', class_='style').get_text()
                try:
                    rating = ratings[i]
                except:
                    rating = None
                i += 1
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 
                
        elif source_type == 'acc':
            
            beers_raw = soup.find_all("div", class_="beer-details")
            for beer in beers_raw:
                p = beer.find('p')
                a = p.find('a')
                beer_name = p.get_text()
                beer_link = f"https://untappd.com{a['href']}"
                brewery = beer.find('p', class_='brewery').get_text()
                style = beer.find('p', class_='style').get_text()
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 

        elif source_type == 'wishlist':
            
            beers_raw = soup.find_all('div', class_='item-info')
            for beer in beers_raw:
                beer_name = beer.find('h2').get_text()
                h2 = beer.find('h2')
                a = h2.find('a')
                beer_link = f"https://untappd.com{a['href']}"
                brewery = beer.find('h3').get_text()
                desc = beer.find('h4').get_text()
                style = desc.split('•')[0]
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 

        else:
            print('no such source type.')
        
        with open('beers.txt', 'w') as f:
            f.write(str(beers))
        
        return beers
    
    @BeautifyOutput 
    def parse_beautifully(self, filename, source_type):
        result = self.parse_the_list(filename,source_type)
        return result

Тесты

In [8]:
bdp = BeerDataParse()

In [9]:
bar = bdp.parse_the_list('bar.html', 'bar')
print(bar[0])

{'beer': 'СИДР / CIDER ', 'style': 'Cider - Traditional', 'rating': 3.632, 'brewery': 'Vasileostrovskaya Brewery (Василеостровская Пивоварня)', 'link': 'https://untappd.com/b/vasileostrovskaya-brewery-vasileostrovskaya-pivovarnya-sidr-cider/570158'}


In [10]:
bar_b = bdp.parse_beautifully('bar.html', 'bar')
print(bar_b[0])

СИДР / CIDER 
Cider - Traditional
3.632
Vasileostrovskaya Brewery (Василеостровская Пивоварня)
https://untappd.com/b/vasileostrovskaya-brewery-vasileostrovskaya-pivovarnya-sidr-cider/570158




In [11]:
brewery = bdp.parse_the_list('brewery.html', 'brewery')
print(brewery[0])

{'beer': 'Redrum IPA Special Edition', 'style': 'IPA - Imperial / Double', 'rating': '3.96', 'brewery': 'similar', 'link': 'https://untappd.com/b/af-brew-redrum-ipa-special-edition/915280'}


In [12]:
acc = bdp.parse_the_list('acc.html', 'acc')
print(acc[0])

{'beer': '(REMIX) Tiki Jazz', 'style': 'Sour - Fruited', 'rating': 4.0, 'brewery': 'Jaws Brewery', 'link': 'https://untappd.com/b/jaws-brewery-remix-tiki-jazz/3724443'}


In [13]:
wish = bdp.parse_the_list('wishlist.html', 'wishlist')
print(wish[0])

{'beer': 'Якудза (Yakuza)', 'style': 'Spiced / Herbed Beer ', 'rating': 3.56, 'brewery': 'Hophead Brewery', 'link': 'https://untappd.com/b/hophead-brewery-yakudza-yakuza/1309384'}


Класс FiddlingWithBeers содержит, собственно, основные значимые функции: вывод топа по оценке или по стилю, рекомендация напитка из ассортимента бара или пивоварни на основании данных собственного аккаунта, проверка наличия позиций из вишлиста в ассортименте бара и бонус — функцию birthday_gift, которая позволяет собрать подарочный пак из позиций бара, начинающихся на буквы имени именинника. 

In [14]:
class FiddlingWithBeers(BeerDataParse):

    def top_beers(self, beers, n):
        for beer in beers:
            if beer['rating'] == None:
                beer['rating'] = 0
        beers_sorted = sorted(beers, key=lambda x: x['rating']) 
        return beers_sorted[-n:]
    
    def top_beers_by_style(self, beers, n):
        for beer in beers:
            if beer['rating'] == None:
                beer['rating'] = 0
        styles = []
        for beer in beers:
            if beer['style'] not in styles:
                styles.append(beer['style'])
        styles = sorted(styles)
        print(styles)
        selected_style = input()
        selected_top = []
        for beer in beers:
            if beer['style'] == selected_style:
                selected_top.append(beer)
        beers_sorted = sorted(selected_top, key=lambda x: x['rating'])
        return beers_sorted[-3:]
    
    def recommend_beer(self, acc_beers, bar_beers, n):
        top = self.top_beers(acc_beers, 10)
        top_acc_styles = []
        for t in top:
            if t['style'] not in top_acc_styles:
                top_acc_styles.append(t['style'])
        selected_style = []
        for beer in bar_beers:
            if beer['style'] in top_acc_styles:
                selected_style.append(beer)
        selected_style_sorted = sorted(selected_style, key=lambda x: x['rating']) 
        return selected_style_sorted[-n:]
    
    def check_wishlist(self, acc_beers, bar_beers):
        result = []
        for beer in acc_beers:
            if beer in bar_beers:
                result.append(beer)
        return result
    
    #bonus function
    
    def birthday_gift(self, name, acc, beers):
        
        beers_not_drunk = [x for x in beers if x["link"] not in {l["link"] for l in acc}]
        ru = Counter(name)
        en = Counter([translit(k, "ru", reversed=True) for k in list(name)])
        
        letters = set(ru.keys()) | set(en.keys()) | {"z"}
        beers_by_letter = {}
        
        for letter in letters:
            beers_by_letter[letter] = [x for x in beers_not_drunk if x["beer"].lower().startswith(letter)]
            
        rest_beers = [x for x in beers if not any(x["beer"].lower().startswith(letter) for letter in letters)]
        take_from_rest = 0
        
        result = []
        used_letters = set()
        for l_ru in name:
            if l_ru in used_letters:
                continue
            used_letters.add(l_ru)
            needed = ru[l_ru]
            l_en = translit(l_ru, "ru", reversed=True)
            beers_ = sorted(beers_by_letter[l_en] + beers_by_letter[l_ru], key=lambda x: x.get("rating") or 0, reverse=True)
            print("letter {}".format(l_ru))
            if needed > len(beers_):
                diff = needed - len(beers_)
                take_from_rest += diff
                print("adding {} beers to take from rest".format(diff))
            beers_to_take = beers_[:needed]
            result.extend(beers_to_take)
            print("\n".join(x["beer"] for x in beers_to_take))
        print("taking {} beers from rest".format(take_from_rest))
        rest_ = sorted(rest_beers, key=lambda x: x.get("rating") or 0, reverse=True)
        beers_to_take = rest_[:take_from_rest]
        result.extend(beers_to_take)
        print("\n".join(x["beer"] for x in beers_to_take))

Тесты

In [15]:
fwb = FiddlingWithBeers()

In [16]:
fwb.birthday_gift('дартаньян', acc, bar)

letter д
Достоевский. Том 5
letter а
All Sails Ripped
Альтер Эго
letter р
Rosé de Gambrinus (2019)
letter т
TERRE - ANNATA 2010
letter н
Noble Apple (2019)
Natrium Gose
letter ь
adding 1 beers to take from rest

letter я
Jack Hammer
taking 1 beers from rest
XYAUYU' KENTUCKY - RISERVA 2015


In [18]:
fwb.top_beers(bar, 4)

[{'beer': "XYAUYU' FUME - RISERVA 2014",
  'style': 'Barleywine - Other',
  'rating': 4.419,
  'brewery': 'BIRRIFICIO AGRICOLO BALADIN - Baladin Indipendente Italian Farm Brewery',
  'link': 'https://untappd.com/b/birrificio-agricolo-baladin-baladin-indipendente-italian-farm-brewery-xyauyu-fume-riserva-2014/2915524'},
 {'beer': '3 Fonteinen Oude Geuze Cuvée Armand & Gaston (season 17|18) Blend No. 25',
  'style': 'Lambic - Gueuze',
  'rating': 4.441,
  'brewery': 'Brouwerij 3 Fonteinen',
  'link': 'https://untappd.com/b/brouwerij-3-fonteinen-3-fonteinen-oude-geuze-cuvee-armand-and-gaston-season-17-18-blend-no-25/3028849'},
 {'beer': 'XYAUYÙ BARREL - RISERVA 2014',
  'style': 'Barleywine - Other',
  'rating': 4.494,
  'brewery': 'BIRRIFICIO AGRICOLO BALADIN - Baladin Indipendente Italian Farm Brewery',
  'link': 'https://untappd.com/b/birrificio-agricolo-baladin-baladin-indipendente-italian-farm-brewery-xyauyu-barrel-riserva-2014/2963304'},
 {'beer': "XYAUYU' KENTUCKY - RISERVA 2015",
 

In [19]:
fwb.top_beers_by_style(bar, 3)

['American Wild Ale', 'Australian Sparkling Ale', 'Barleywine - American', 'Barleywine - English', 'Barleywine - Other', 'Belgian Dubbel', 'Belgian Quadrupel', 'Belgian Strong Dark Ale', 'Belgian Strong Golden Ale', 'Belgian Tripel', 'Bière de Champagne / Bière Brut', 'Bock - Doppelbock', 'Bock - Hell / Maibock / Lentebock', 'Bock - Single / Traditional', 'Bock - Weizenbock', 'Brown Ale - American', 'Brown Ale - English', 'Brown Ale - Imperial / Double', 'Burton Ale', 'Chilli / Chile Beer', 'Cider - Dry', 'Cider - Herbed / Spiced / Hopped', 'Cider - Other Fruit', 'Cider - Perry', 'Cider - Traditional', 'Dark Ale', 'English Bitter', 'Farmhouse Ale - Other', 'Farmhouse Ale - Sahti', 'Farmhouse Ale - Saison', 'Fruit Beer', 'Gluten-Free', 'Hefeweizen', 'IPA - American', 'IPA - Belgian', 'IPA - English', 'IPA - Imperial / Double', 'IPA - Imperial / Double New England', 'IPA - International', 'IPA - Milkshake', 'IPA - New England', 'IPA - Session / India Session Ale', 'IPA - Sour', 'IPA - Tr

[{'beer': 'MAD PRINTER',
  'style': 'IPA - Imperial / Double',
  'rating': 3.917,
  'brewery': 'Ostrovica',
  'link': 'https://untappd.com/b/ostrovica-mad-printer/3808840'},
 {'beer': 'Dandy Rover',
  'style': 'IPA - Imperial / Double',
  'rating': 3.95,
  'brewery': 'GAS Brew',
  'link': 'https://untappd.com/b/gas-brew-dandy-rover/3403044'},
 {'beer': 'Minor 6th',
  'style': 'IPA - Imperial / Double',
  'rating': 3.997,
  'brewery': 'Ostrovica',
  'link': 'https://untappd.com/b/ostrovica-minor-6th/3809672'}]

In [20]:
fwb.check_wishlist(wish, wish)[0]

{'beer': 'Якудза (Yakuza)',
 'style': 'Spiced / Herbed Beer ',
 'rating': 3.56,
 'brewery': 'Hophead Brewery',
 'link': 'https://untappd.com/b/hophead-brewery-yakudza-yakuza/1309384'}