In [1]:
import requests
import queue
from bs4 import BeautifulSoup

In [2]:
class VisitedUser:
    def __init__(self, user_id, name, checkins, beers, friends, badges):
        self.user_id = user_id
        self.name = name
        self.checkins = checkins
        self.beers = beers
        self.friends = friends
        self.badges = badges
    
    def __repr__(self):
        return f"{self.name} ({self.user_id}) has rated {self.beers} beers through {self.checkins} checkins, has {self.friends} friends and got {self.badges} badges"

In [3]:
class CheckIn:
    def __init__(self, checkin_id, user_id, beer_id, rating = None, location_id = None, comment = '', tagged_friends = [], cheers = 0):
        self.checkin_id = checkin_id
        self.user_id = user_id
        self.beer_id = beer_id
        self.rating = rating
        self.location_id = location_id
        self.comment = comment
        self.tagged_friends = tagged_friends
        self.cheers = cheers

In [4]:
headers = {'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0'}
cookies = {'untappd_user_v3_e': '2d3777bd1b7400dad5984e058296c4a40f706e511c88fb81e3d076bb966c637db308642b067f6d8e785ab6a6b9ab18469688860d31156b26bf166a580a1a2de77efrl%2FvnfpEqSsdCYRCRZteMw4gouJAbPdaxmrmOTmO9JO78aqRSN6pVBbpUAPqK6mBkmFCc3CAykAx9%2B1lQOA%3D%3D'}

In [5]:
user_friends_url_template = "https://untappd.com/user/{}/friends"
more_friends_url_template = "https://untappd.com/friend/more_friends/{}/{}?sort="

def get_user_friends(username):
    request = requests.get(user_friends_url_template.format(username), headers=headers, cookies=cookies) 
    if request.status_code != 200:
        print("ERROR: {} for {}".format(request.status_code, username))
        return []
    
    soup = BeautifulSoup(request.text, 'html.parser')
    
    return [span.getText() for span in soup.body.findAll("span", {"class": "username"})]

In [6]:
def scrap_user_stats(user_id):
    request_url = f"https://untappd.com/user/{user_id}"
    request = requests.get(request_url, headers=headers, cookies=cookies)
    soup = BeautifulSoup(request.text, 'html.parser')
    
    name = soup.find("div", {"class": "info"}).find("h1").text.strip()

    stats_div = soup.find("div", {"class": "stats"})
    def find_stat_with_href(href):
        return int(stats_div.find("a", {"href": href}).find("span", {"class": "stat"}).text.replace(",", ""))

    checkins = find_stat_with_href(f"/user/{user_id}")
    beers = find_stat_with_href(f"/user/{user_id}/beers")
    friends = find_stat_with_href(f"/user/{user_id}/friends")
    badges = find_stat_with_href(f"/user/{user_id}/badges")
    return VisitedUser(user_id, name, checkins, beers, friends, badges)

In [7]:
def scrap_user_checkins(user_id):
    request_url = f"https://untappd.com/user/{user_id}"
    request = requests.get(request_url, headers=headers, cookies=cookies)
    checkins = BeautifulSoup(request.text, 'html.parser').select('div[id*="checkin_"]')
    
    def parse_checkin_from_html(checkin):
        checkin_id = int(checkin["data-checkin-id"])
        checkin_description = checkin.find("p", {"class": "text"}).contents
        beer_id = None
        if ' is drinking a ' in checkin_description:
            beer_id = int(checkin_description[checkin_description.index(' is drinking a ') + 1]['href'].split("/")[-1])

        location_id = None
        if ' at ' in checkin_description:
            location_id = int(checkin_description[checkin_description.index(' at ') + 1]['href'].split("/")[-1])

        rating_div = checkin.find("div", {"class": "caps"})
        rating = None if rating_div is None else float(rating_div['data-rating'])

        comment_div = checkin.find("p", {"class": "comment-text"})
        comment = None if comment_div is None else comment_div.text.strip()

        tagged_friends_div = checkin.find("div", {"class": "tagged-friends"})
        tagged_friends = [] if tagged_friends_div is None else [a["href"].split("/")[-1] for a in tagged_friends_div.findAll("a")]

        cheers_div = checkin.find("div", {"class": "cheers"})
        cheers = 0 if cheers_div is None else int(cheers_div.find("span", {"class": "count"}).find("span").text)

        return CheckIn(checkin_id, user_id, beer_id, rating, location_id, comment, tagged_friends, cheers)
    
    return [parse_checkin_from_html(checkin) for checkin in checkins]

In [8]:
user_id = "Sheehan"
print(scrap_user_stats(user_id))
print(scrap_user_checkins(user_id)[0])

Michael S. (Sheehan) has rated 2175 beers through 2336 checkins, has 52 friends and got 1726 badges
<__main__.CheckIn object at 0x0000019F536BC370>


In [None]:
request = requests.get(user_friends_url_template.format("timm3h"), headers=headers, cookies=cookies)
soup = BeautifulSoup(request.text, 'html.parser')
number_of_friends = int(soup.body.find("div", {"class": "top"}).find("p")\
    .text.split()[-1]\
    .replace('(', '').replace(')', ''))

In [None]:
more_friends_request = requests.get(more_friends_url_template.format("timm3h", 25), headers=headers, cookies=cookies)
soup = BeautifulSoup(more_friends_request.content, 'html.parser')

In [None]:
more_friends_request.request.headers

In [None]:
start_user = "Jonnyhead"
unvisited_nodes = set()
unvisited_nodes.add(start_user)

visited_nodes = set()
total_nodes = [0]

i = 0
while len(unvisited_nodes) > 0:
    username = unvisited_nodes.pop()
    for friend in get_user_friends(username):
        if friend not in unvisited_nodes and friend not in visited_nodes:
            unvisited_nodes.add(friend)
    visited_nodes.add(username)
    total_nodes.append(len(unvisited_nodes) + len(visited_nodes))
    print("{:03d}: {} found users            ".format(i+1, len(unvisited_nodes) + len(visited_nodes)), end='\r')
    i += 1