In [1]:
import os
import time
import functools
from bs4 import BeautifulSoup 
from urllib import request
from selenium import webdriver
from selenium.common.exceptions import ElementNotInteractableException

In [2]:
class BeerDataExport:
    
    def __init__(self, ffdir, geckodriver_dir, url, source_type='bar'):
        
        # source_type options: 'bar', 'brewery', 'acc' (personal account export), 'wishlist'
        
        self.ffdir = ffdir
        self.geckodriver_dir = geckodriver_dir
        self.url = url
        self.source_type = source_type
        self.run_the_browser(ffdir, geckodriver_dir, url, source_type)
    
    def run_the_browser(self, ffdir, geckodriver_dir, url, source_type):
        
        ffprofile = webdriver.FirefoxProfile(ffdir)
        driver = webdriver.Firefox(executable_path = geckodriver_dir, firefox_profile=ffprofile) 
        driver.get(url)
        
        if self.source_type == 'bar':
            filename = 'bar'
            btn = driver.find_element_by_css_selector("a.yellow.button.more.show-more-section.track-click")
        elif self.source_type == 'brewery':
            filename = 'brewery'
            btn = driver.find_element_by_css_selector("a.button.yellow")
        elif self.source_type == 'acc':
            filename = 'acc'
            btn = driver.find_element_by_css_selector("a.button.yellow.more.more-list-items.track-click")
        elif self.source_type == 'wishlist':
            filename = 'wishlist'
            btn = driver.find_element_by_css_selector("a.yellow.button.more.more-list-items")
        else:
            print('no such source type.')
        
        old_position = 0
        new_position = None

        while True:
            try:
                while new_position != old_position:
                    old_position = driver.execute_script(
                            ("return (window.pageYOffset !== undefined) ?"
                             " window.pageYOffset : (document.documentElement ||"
                             " document.body.parentNode || document.body);"))
                    time.sleep(1)
                    driver.execute_script((
                            "var scrollingElement = (document.scrollingElement ||"
                            " document.body);scrollingElement.scrollTop ="
                            " scrollingElement.scrollHeight;"))
                    btn.click()
                    new_position = driver.execute_script(
                            ("return (window.pageYOffset !== undefined) ?"
                             " window.pageYOffset : (document.documentElement ||"
                             " document.body.parentNode || document.body);"))
            except ElementNotInteractableException:
                with open('{}.html'.format(filename), 'w') as f:
                    f.write(driver.page_source)
                break 

Тесты 

In [93]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/v/share-house/7728362?ng_menu_id=2af7b1fd-7ca0-4ed7-9ecf-4db4920877c9',
               source_type='bar')

<__main__.BeerDataExport at 0x7f81d13acdd0>

In [88]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/AntiFactory/beer',
               source_type='brewery')

<__main__.BeerDataExport at 0x7f81d13ae150>

In [95]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/user/sheamrock/beers',
               source_type='acc')

<__main__.BeerDataExport at 0x7f81d139ed90>

In [91]:
BeerDataExport('/Users/macbookpro/Library/Application Support/Firefox/Profiles/p4mnjkfe.untappd',
               '/Users/macbookpro/Downloads/geckodriver',
               'https://untappd.com/user/mettathrone/wishlist',
               source_type='wishlist')

<__main__.BeerDataExport at 0x7f81d13abd50>

In [43]:
class BeerDataParse:
    
    def __init__(self):
        pass
    
    def BeautifyOutput(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            bar_beauty = func(*args, **kwargs)
            print('haha')
            for beer in bar_beauty:
                print(beer['beer'] + '\n' + beer['style'] + '\n' + str(beer['rating']) + '\n' 
                      + beer['brewery'] + '\n' + beer['link'] + '\n\n')
        return wrapper
            
    @BeautifyOutput    
    def parse_the_list(self, filename, source_type):
        
        beers = []
        
        with open(filename) as f:
            contents = f.read()
        soup = BeautifulSoup(contents)
        
        if source_type == 'bar':
            
            beers_raw = soup.find_all("div", class_="beer-details")
            for beer in beers_raw:
                h5 = beer.find('h5')
                h5_a = h5.find('a')    
                beer_link = f"https://untappd.com{h5_a['href']}"
                beer_name = h5_a.get_text()
                brewery = beer.find("a", attrs={"data-href": ":brewery"}).get_text()
                style = h5.find("em").get_text()
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link})
    
        elif source_type == 'brewery':
            
            ratings_raw = soup.find_all('span', class_='num')
            ratings = []
            for rt in ratings_raw:
                rating_raw = rt.get_text()
                rating_line = str(rating_raw)
                rating = rating_line[1:-1]
                ratings.append(rating)
            beers_raw = soup.find_all('div', class_='beer-details')
            i = 0
            for beer in beers_raw:
                a = beer.find('a')
                beer_name = beer.find('p').get_text()
                beer_link = f"https://untappd.com{a['href']}"
                brewery = 'similar'
                style = beer.find('p', class_='style').get_text()
                try:
                    rating = ratings[i]
                except:
                    rating = None
                i += 1
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 
                
        elif source_type == 'acc':
            
            beers_raw = soup.find_all("div", class_="beer-details")
            for beer in beers_raw:
                p = beer.find('p')
                a = p.find('a')
                beer_name = p.get_text()
                beer_link = f"https://untappd.com{a['href']}"
                brewery = beer.find('p', class_='brewery').get_text()
                style = beer.find('p', class_='style').get_text()
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 

        elif source_type == 'wishlist':
            
            beers_raw = soup.find_all('div', class_='item-info')
            for beer in beers_raw:
                beer_name = beer.find('h2').get_text()
                h2 = beer.find('h2')
                a = h2.find('a')
                beer_link = f"https://untappd.com{a['href']}"
                brewery = beer.find('h3').get_text()
                desc = beer.find('h4').get_text()
                style = desc.split('•')[0]
                try:
                    rating = float(beer.find("div", class_="caps")["data-rating"])
                except:
                    rating = None
                beers.append({"beer": beer_name, "style": style, "rating": rating, "brewery": brewery, "link": beer_link}) 

        else:
            print('no such source type.')
        
        with open('beers.txt', 'w') as f:
            f.write(str(beers))
        
        return beers

Тесты

In [44]:
bdp = BeerDataParse()

In [45]:
bdp.parse_the_list('bar.html', 'bar')

haha
Gueuze 100% Lambic Bio
Lambic - Gueuze
4.208
Brasserie Cantillon
https://untappd.com/b/brasserie-cantillon-gueuze-100-lambic-bio/56829


Rosé de Gambrinus (2019)
Lambic - Framboise
4.319
Brasserie Cantillon
https://untappd.com/b/brasserie-cantillon-rose-de-gambrinus-2019/3079645


Black Radiation OAK Chivas Edition
Stout - Russian Imperial
4.095
RMBrew
https://untappd.com/b/rmbrew-black-radiation-oak-chivas-edition/2784315


Mjød Липа
Mead - Metheglin
4.001
Gurin Craft
https://untappd.com/b/gurin-craft-mjod-lipa/3637173


Oppa! Apa
Pale Ale - American
3.768
RMBrew
https://untappd.com/b/rmbrew-oppa-apa/3527233


Окрошка
Sour - Gose
3.579
Шляпа Brewery
https://untappd.com/b/shlyapa-brewery-okroshka/3470265


PARTY DIPA
IPA - Imperial / Double
3.687
RMBrew
https://untappd.com/b/rmbrew-party-dipa/2063419


MouseTrap
Sour - Fruited
3.506
RMBrew
https://untappd.com/b/rmbrew-mousetrap/3749983


Too Many Options
Stout - Milk / Sweet
3.817
RMBrew
https://untappd.com/b/rmbrew-too-many-optio

In [46]:
bdp.parse_the_list('brewery.html', 'brewery')

haha
Redrum IPA Special Edition
IPA - Imperial / Double
3.96
similar
https://untappd.com/b/af-brew-redrum-ipa-special-edition/915280


Mosaic IPA
IPA - American
3.96
similar
https://untappd.com/b/af-brew-mosaic-ipa/309806


Big Black Mash
Stout - Imperial / Double Pastry
3.96
similar
https://untappd.com/b/af-brew-big-black-mash/2588712


Eat the Dust! DDH Mosaic
IPA - Imperial / Double New England
4.16
similar
https://untappd.com/b/af-brew-eat-the-dust-ddh-mosaic/2065074


Ingria Pale Ale
IPA - American
4.08
similar
https://untappd.com/b/af-brew-ingria-pale-ale/222101


Passion Is My Confession
Sour - Fruited Berliner Weisse
3.87
similar
https://untappd.com/b/af-brew-passion-is-my-confession/1722897


Зимняя Меланхолия (Winter Melancholy)
Porter - Imperial / Double Baltic
3.98
similar
https://untappd.com/b/af-brew-zimnyaya-melanholiya-winter-melancholy/1361616


Eat the Dust! DDH Citra
IPA - Imperial / Double New England
4.07
similar
https://untappd.com/b/af-brew-eat-the-dust-ddh-citra

In [36]:
bdp.parse_the_list('acc.html', 'acc')

haha
(REMIX) Tiki Jazz
Sour - Fruited
4.0
Jaws Brewery
https://untappd.com/b/jaws-brewery-remix-tiki-jazz/3724443


Baltika #0 Grapefruit / Балтика #0 Грейпфрут
Non-Alcoholic Beer
4.0
Baltika Breweries (Балтика)
https://untappd.com/b/baltika-breweries-baltika-baltika-0-grapefruit-baltika-0-grejpfrut/2908872


Symbiotica Apple
Cider - Traditional
4.0
Symbiotica by AF Brew × BSG
https://untappd.com/b/symbiotica-by-af-brew-bsg-symbiotica-apple/3707608


Pomme De Sodome
Cider - Graff
4.5
AF Brew
https://untappd.com/b/af-brew-pomme-de-sodome/3588530


All Together
IPA - New England
4.0
AF Brew
https://untappd.com/b/af-brew-all-together/3759886


Нимфоманка (Nymphomaniac)
Barleywine - English
4.0
Hophead Brewery
https://untappd.com/b/hophead-brewery-nimfomanka-nymphomaniac/1712644


Bye Bye Take Your Pie
Stout - Imperial / Double Milk
3.5
Bottle Share
https://untappd.com/b/bottle-share-bye-bye-take-your-pie/3635282


Cherry Merry Princess
Sour - Other
4.0
HumptyDumpty
https://untappd.com/b/h

In [39]:
bdp.parse_the_list('wishlist.html', 'wishlist')

haha
Якудза (Yakuza)
Spiced / Herbed Beer 
3.56
Hophead Brewery
https://untappd.com/b/hophead-brewery-yakudza-yakuza/1309384


BANANA BANG
Sour - Fruited 
3.81
FFF BREW
https://untappd.com/b/fff-brew-banana-bang/3716473


SPLURGE
Sour - Fruited 
4.46
Red Button Brewery
https://untappd.com/b/red-button-brewery-splurge/3690113


Tactical Nuclear Penguin
Freeze-Distilled Beer 
3.87
BrewDog
https://untappd.com/b/brewdog-tactical-nuclear-penguin/5713


ARE YOU F**KING NUTS?
Stout - Russian Imperial 
4.0
Browar Minister 
https://untappd.com/b/browar-minister-are-you-f-king-nuts/3167372


Duck Diving
Sour - Fruited Gose 
3.75
Brewski
https://untappd.com/b/brewski-duck-diving/3299131


Антропоцентризм / Anthropocentrism
Stout - Russian Imperial 
3.75
Пивоварня Чаща / Thicket Brewery
https://untappd.com/b/pivovarnya-chasha-thicket-brewery-antropocentrizm-anthropocentrism/3571633


The Lord of Bounty
Stout - Imperial / Double Milk 
4.02
Plan B Brewery
https://untappd.com/b/plan-b-brewery-the-lor

In [126]:
class FiddlingWithBeers:
    
    def __init__(self):
        pass
    
    def top_beers(self, beers, n):
        for beer in beers:
            if beer['rating'] == None:
                beer['rating'] = 0
        beers_sorted = sorted(beers, key=lambda x: x['rating']) 
        return beers_sorted[-n:]
    
    def top_beers_by_style(self, beers, n):
        for beer in beers:
            if beer['rating'] == None:
                beer['rating'] = 0
        styles = []
        for beer in beers:
            if beer['style'] not in styles:
                styles.append(beer['style'])
        styles = sorted(styles)
        print(styles)
        selected_style = input()
        selected_top = []
        for beer in beers:
            if beer['style'] == selected_style:
                selected_top.append(beer)
        beers_sorted = sorted(selected_top, key=lambda x: x['rating'])
        return beers_sorted[-3:]
    
    def recommend_beer(self, acc_beers, bar_beers, n):
        top = self.top_beers(acc_beers, 10)
        top_acc_styles = []
        for t in top:
            if t['style'] not in top_acc_styles:
                top_acc_styles.append(t['style'])
        selected_style = []
        for beer in bar_beers:
            if beer['style'] in top_acc_styles:
                selected_style.append(beer)
        selected_style_sorted = sorted(selected_style, key=lambda x: x['rating']) 
        return selected_style_sorted[-n:]
    
    def check_wishlist(self, acc_beers, bar_beers):
        result = []
        for beer in acc_beers:
            if beer in bar_beers:
                result.append(beer)
        return result

Тесты

In [127]:
fwb = FiddlingWithBeers()

In [124]:
fwb.top_beers(bar, 2)

[{'beer': 'XYAUYÙ BARREL - RISERVA 2014',
  'style': 'Barleywine - Other',
  'rating': 4.494,
  'brewery': 'BIRRIFICIO AGRICOLO BALADIN - Baladin Indipendente Italian Farm Brewery',
  'link': 'https://untappd.com/b/birrificio-agricolo-baladin-baladin-indipendente-italian-farm-brewery-xyauyu-barrel-riserva-2014/2963304'},
 {'beer': "XYAUYU' KENTUCKY - RISERVA 2015",
  'style': 'Barleywine - Other',
  'rating': 4.542,
  'brewery': 'BIRRIFICIO AGRICOLO BALADIN - Baladin Indipendente Italian Farm Brewery',
  'link': 'https://untappd.com/b/birrificio-agricolo-baladin-baladin-indipendente-italian-farm-brewery-xyauyu-kentucky-riserva-2015/2915525'}]

In [129]:
fwb.top_beers_by_style(bar, 3)

['American Wild Ale', 'Barleywine - American', 'Barleywine - English', 'Barleywine - Other', 'Belgian Dubbel', 'Belgian Quadrupel', 'Belgian Strong Dark Ale', 'Belgian Strong Golden Ale', 'Belgian Tripel', 'Bière de Champagne / Bière Brut', 'Bock - Doppelbock', 'Bock - Hell / Maibock / Lentebock', 'Bock - Single / Traditional', 'Bock - Weizenbock', 'Brown Ale - American', 'Brown Ale - Imperial / Double', 'Burton Ale', 'Chilli / Chile Beer', 'Cider - Dry', 'Cider - Herbed / Spiced / Hopped', 'Cider - Other Fruit', 'Cider - Traditional', 'Dark Ale', 'English Bitter', 'Farmhouse Ale - Other', 'Farmhouse Ale - Sahti', 'Farmhouse Ale - Saison', 'Fruit Beer', 'Gluten-Free', 'Hefeweizen', 'IPA - American', 'IPA - Belgian', 'IPA - English', 'IPA - Imperial / Double', 'IPA - Imperial / Double New England', 'IPA - International', 'IPA - Milkshake', 'IPA - New England', 'IPA - Session / India Session Ale', 'IPA - Sour', 'IPA - Triple', 'Lager - Helles', 'Lager - Munich Dunkel', 'Lambic - Frambois

[{'beer': 'Соло',
  'style': 'IPA - American',
  'rating': 3.876,
  'brewery': 'Samovar Brew',
  'link': 'https://untappd.com/b/samovar-brew-solo/3671217'},
 {'beer': 'BUMPY',
  'style': 'IPA - American',
  'rating': 3.896,
  'brewery': 'Red Button Brewery',
  'link': 'https://untappd.com/b/red-button-brewery-bumpy/3629982'},
 {'beer': 'Electrocat',
  'style': 'IPA - American',
  'rating': 3.926,
  'brewery': 'Sabotage',
  'link': 'https://untappd.com/b/sabotage-electrocat/3202918'}]

In [130]:
fwb.recommend_beer(acc, bar, 3)

[{'beer': 'Choco Mammuth',
  'style': 'Stout - Imperial / Double Milk',
  'rating': 4.085,
  'brewery': 'One Ton Brewery (Пивоварня Одна Тонна)',
  'link': 'https://untappd.com/b/one-ton-brewery-pivovarnya-odna-tonna-choco-mammuth/1591952'},
 {'beer': 'Schoolyard',
  'style': 'IPA - Triple',
  'rating': 4.091,
  'brewery': 'Time Bomb Brewery',
  'link': 'https://untappd.com/b/time-bomb-brewery-schoolyard/3668058'},
 {'beer': 'Альтер Эго',
  'style': 'Stout - Imperial / Double Milk',
  'rating': 4.221,
  'brewery': 'Big Village Brewery',
  'link': 'https://untappd.com/b/big-village-brewery-alter-ego/3801547'}]

In [133]:
fwb.check_wishlist(wishlist, wishlist)[0]

{'beer': 'Якудза (Yakuza)',
 'style': 'Spiced / Herbed Beer ',
 'rating': 3.56,
 'brewery': 'Hophead Brewery',
 'link': 'https://untappd.com/b/hophead-brewery-yakudza-yakuza/1309384'}