In [8]:
import requests
import queue
from bs4 import BeautifulSoup

In [1]:
class User:
    def __init__(self, user_id, name, checkins, beers, friends, badges):
        self.user_id = user_id
        self.name = name
        self.checkins = checkins
        self.beers = beers
        self.friends = friends
        self.badges = badges
    
    def __repr__(self):
        return f"{self.name} ({self.user_id}) has rated {self.beers} beers through {self.checkins} checkins, has {self.friends} friends and got {self.badges} badges"

In [10]:
class CheckIn:
    def __init__(self, checkin_id, user_id, beer_id, rating = None, location_id = None, comment = '', tagged_friends = [], cheers = 0):
        self.checkin_id = checkin_id
        self.user_id = user_id
        self.beer_id = beer_id
        self.rating = rating
        self.location_id = location_id
        self.comment = comment
        self.tagged_friends = tagged_friends
        self.cheers = cheers
        
    def __repr__(self):
        return f"({self.checkin_id}, {self.user_id}, {self.beer_id}, {self.rating}, {self.location_id}, {self.comment}, {self.tagged_friends}, {self.cheers})"

In [20]:
class Beer:
    def __init__(self, beer_id, name, brewery_id, style, abv, ibu, avg_rating, total_ratings, total_checkins, unique_users):
        self.beer_id = beer_id
        self.name = name
        self.brewery_id = brewery_id
        self.style = style
        self.abv = abv
        self.ibu = ibu
        self.avg_rating = avg_rating
        self.total_ratings = total_ratings
        self.total_checkins = total_checkins
        self.unique_users = unique_users
    
    def __repr__(self):
        return f"({self.beer_id}, {self.name}, {self.brewery_id}, {self.style}, {self.abv} ABV, {self.ibu} IBU, {self.avg_rating}, {self.total_ratings}, {self.total_checkins}, {self.unique_users})"

In [32]:
headers = {'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0'}
cookies = {'untappd_user_v3_e': '59884cc5903a2ad0d4a2707a8caf891d9ac17e0c016977b66432c1e7ae6b2d5667ed6a177cccf18861870eb1c0d6b333888d6d0c01ae69b45e5dcd0c5bb00d1edReLZMP%2Fi3XSY3q3FUNdC6FMVPkz3hUGk%2FFPBfVStfaamglZ0wJMZczAFofaAewWTdWi%2BCC260FZ1uGrzfRWGg%3D%3D'}

In [22]:
user_friends_url_template = "https://untappd.com/user/{}/friends"
more_friends_url_template = "https://untappd.com/friend/more_friends/{}/{}?sort="

def get_user_friends(username):
    request = requests.get(user_friends_url_template.format(username), headers=headers, cookies=cookies) 
    if request.status_code != 200:
        print("ERROR: {} for {}".format(request.status_code, username))
        return []
    
    soup = BeautifulSoup(request.text, 'html.parser')
    
    return [span.getText() for span in soup.body.findAll("span", {"class": "username"})]

In [25]:
def scrap_user_stats(user_id):
    user_url = f"https://untappd.com/user/{user_id}"
    request = requests.get(user_url, headers=headers, cookies=cookies)
    soup = BeautifulSoup(request.text, 'html.parser')
    
    name = soup.find("div", {"class": "info"}).find("h1").text.strip()

    stats_div = soup.find("div", {"class": "stats"})
    def find_stat_with_href(href):
        return int(stats_div.find("a", {"href": href}).find("span", {"class": "stat"}).text.replace(",", ""))

    checkins = find_stat_with_href(f"/user/{user_id}")
    beers = find_stat_with_href(f"/user/{user_id}/beers")
    friends = find_stat_with_href(f"/user/{user_id}/friends")
    badges = find_stat_with_href(f"/user/{user_id}/badges")
    return User(user_id, name, checkins, beers, friends, badges)

In [26]:
def scrap_user_checkins(user_id):
    request_url = f"https://untappd.com/user/{user_id}"
    request = requests.get(request_url, headers=headers, cookies=cookies)
    checkins = BeautifulSoup(request.text, 'html.parser').select('div[id*="checkin_"]')
    
    def parse_checkin_from_html(checkin):
        checkin_id = int(checkin["data-checkin-id"])
        checkin_description = checkin.find("p", {"class": "text"}).contents
        beer_id = None
        if ' is drinking a ' in checkin_description:
            beer_id = int(checkin_description[checkin_description.index(' is drinking a ') + 1]['href'].split("/")[-1])

        location_id = None
        if ' at ' in checkin_description:
            location_id = int(checkin_description[checkin_description.index(' at ') + 1]['href'].split("/")[-1])

        rating_div = checkin.find("div", {"class": "caps"})
        rating = None if rating_div is None else float(rating_div['data-rating'])

        comment_div = checkin.find("p", {"class": "comment-text"})
        comment = None if comment_div is None else comment_div.text.strip()

        tagged_friends_div = checkin.find("div", {"class": "tagged-friends"})
        tagged_friends = [] if tagged_friends_div is None else [a["href"].split("/")[-1] for a in tagged_friends_div.findAll("a")]

        cheers_div = checkin.find("div", {"class": "cheers"})
        cheers = 0 if cheers_div is None else int(cheers_div.find("span", {"class": "count"}).find("span").text)

        return CheckIn(checkin_id, user_id, beer_id, rating, location_id, comment, tagged_friends, cheers)
    
    return [parse_checkin_from_html(checkin) for checkin in checkins]

In [56]:
beer_id = 96597
beer_url = f"https://untappd.com/b/a/{beer_id}"
request = requests.get(beer_url, headers=headers, cookies=cookies)
soup = BeautifulSoup(request.text, 'html.parser')

In [57]:
name_div = soup.find("div", {"class": "name"})
name = name_div.find("h1").text
brewery_id = name_div.find("a")['href'].split("/")[-1]
style = name_div.find("p", {"class":"style"}).text

details_div = soup.find("div", {"class": "details"})
abv = float(details_div.find("p", {"class": "abv"}).text.split("%")[0])
ibu_text = details_div.find("p", {"class": "ibu"}).text.strip()
ibu = None if ibu_text == "No IBU" else int(ibu_text.split(" ")[0])
avg_rating = float(details_div.find("div", {"class": "caps"})["data-rating"])
total_ratings = int(details_div.find("p", {"class": "raters"}).text.split(" ")[0].replace(",", ""))


name, brewery_id, style, abv, ibu, avg_rating, total_ratings

('Gulden Draak 9000 Quadruple',
 'BrouwerijVanSteenberge',
 'Belgian Quadrupel',
 10.5,
 25,
 3.88253,
 105508)

In [15]:
user_id = "Sheehan"
print(scrap_user_stats(user_id))
print(scrap_user_checkins(user_id)[0])

Michael S. (Sheehan) has rated 2192 beers through 2353 checkins, has 52 friends and got 1736 badges
(941126317, Sheehan, None, 3.5, 306973, None, ['mgleason6122'], 1)


In [None]:
request = requests.get(user_friends_url_template.format("timm3h"), headers=headers, cookies=cookies)
soup = BeautifulSoup(request.text, 'html.parser')
number_of_friends = int(soup.body.find("div", {"class": "top"}).find("p")\
    .text.split()[-1]\
    .replace('(', '').replace(')', ''))

In [None]:
more_friends_request = requests.get(more_friends_url_template.format("timm3h", 25), headers=headers, cookies=cookies)
soup = BeautifulSoup(more_friends_request.content, 'html.parser')

In [None]:
more_friends_request.request.headers

In [None]:
start_user = "Jonnyhead"
unvisited_nodes = set()
unvisited_nodes.add(start_user)

visited_nodes = set()
total_nodes = [0]

i = 0
while len(unvisited_nodes) > 0:
    username = unvisited_nodes.pop()
    for friend in get_user_friends(username):
        if friend not in unvisited_nodes and friend not in visited_nodes:
            unvisited_nodes.add(friend)
    visited_nodes.add(username)
    total_nodes.append(len(unvisited_nodes) + len(visited_nodes))
    print("{:03d}: {} found users            ".format(i+1, len(unvisited_nodes) + len(visited_nodes)), end='\r')
    i += 1