In [2]:
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import mysql.connector

base_url = 'https://www.yelp.com/biz/'

url = "https://www.yelp.com/biz/ryptic-room-escape-san-mateo-3?osq=escape+room"
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html.parser')

In [50]:
span_elements = soup.find_all(class_='raw__09f24__T4Ezm', lang='en')

In [49]:
# Returns the upper bound for looping through the review pages.
# Example: If the review page consists of 20 pages, then the search query will
# append '?start=190' to the URL to get to the last review page. This function will
# return 191, the upper bound needed for a for-loop to go through all the review pages.
def get_reviews_loop_upper_bound(soup_obj):
    upper_bound = 0
    # 'css-chan6m' elements contain a "# of #", with the former being the current review page
    # and the latter being the last review page.
    chan6m_elems = soup.find_all(class_='css-chan6m')
    for chan6m_elem in chan6m_elems:
        if 'of' in chan6m_elem.text:
            upper_bound = (int(chan6m_elem.text.split()[-1]) - 1) * 10 + 1 # get last review page
            return upper_bound
    return upper_bound

get_reviews_loop_upper_bound(soup)

171

In [36]:
# Takes in a soup object and retuns the escape room's name and the city the room is in
def get_room_info(soup_obj):
    room_title_info = soup_obj.title.text.split('-')
    
    # Title is the 0th element in the list with trailing white spaces
    room_name = room_title_info[0].strip()

    # Address is the 2nd elemnt in the list, city is the second to last element in list with trailing white spaces
    room_city = room_title_info[2].split(',')[-2].strip()
    return room_name, room_city

In [37]:
room_name, room_city = get_room_info(soup)
print("{} - {}\n-----------".format(room_name, room_city))
for element in span_elements:
    text = element.text
    blob = TextBlob(text)
    sentiment_score = blob.sentiment.polarity
    print("{:.4f} : {}".format(sentiment_score, text))


RYPTIC ROOM ESCAPE - San Mateo
-----------
0.2750 : We had a great game master, but that's about all I can say that was good about this escape room. I've done dozens of escape rooms and this is one of the jankiest rooms I've done. Low tech, you can see all four rooms when you walk in, and the walls are made of legos. And no, I'm not joking.
-0.0701 : We booked our event for a birthday party on 8/1/2022 and the day before the event, Ryptic sent an automated cancellation. When I emailed for a manager and called to find out what happened, there was no response.  Later, I get an email saying the host couldn't make it and so they had to cancel.  Poorly handled.  Extremely disappointing Ryptic.  This is not how you run a business!  Now we are left last minute with not birthday plans...wow...so inconsiderate and thoughtless.  I get that things happen, but a courtesy call and, hey, we're sorry, how can we make things right with you would have been a better option.  This is why you're getting a

In [56]:
base_url = 'https://www.yelp.com/biz/'
room_url = 'ryptic-room-escape-san-mateo-3'
upper_bound = 171

# Gets every review of an escape room and returns it in a list
def get_all_page_reviews(base_url, room_url, upper_bound):
    # Helper function that takes in a soup object and returns the reviews of
    # that "page" as a list of reviews
    def get_page_reviews(soup_obj):
        page_reviews = []
        # Class='raw__09f24__T4Ezm' and lang='en' tags specifies reviews
        review_elements = soup.find_all(class_='raw__09f24__T4Ezm', lang='en')
        for review_element in review_elements:
            review = review_element.text
            page_reviews.append(review)
        return page_reviews

    all_reviews = []
    for i in range(0, upper_bound, 10): # loop through all review pages
        full_url = base_url + room_url
        if i != 0: # append appropriate search query for review page
            full_url += f'?start={i}' 
        
        # Make html request on full_url and create soup object
        html = requests.get(full_url)
        soup_obj = BeautifulSoup(html.text, 'html.parser')

        # .extend instead of .append because get_page_reviews returns a list
        all_reviews.extend(get_page_reviews(soup_obj))
    return all_reviews

test_reviews = get_all_page_reviews(base_url, room_url, 21) 

In [57]:
len(test_reviews)

30

In [59]:
# Takes in a list of reviews and returns a list of tuples with the 
# 0th element being the sentiment score and the 1st element being the
# corresponding review.
def create_review_sentiment_list(reviews):
    review_sentiment_list = []
    for review in reviews:
        review_blob = TextBlob(review)
        sentiment_score = review_blob.sentiment.polarity
        review_sentiment_list.append((round(sentiment_score, 4), review))
    return review_sentiment_list

test_review_sentiment_list = create_review_sentiment_list(test_reviews)
test_review_sentiment_list

[(0.275,
  "We had a great game master, but that's about all I can say that was good about this escape room. I've done dozens of escape rooms and this is one of the jankiest rooms I've done. Low tech, you can see all four rooms when you walk in, and the walls are made of legos. And no, I'm not joking."),
 (-0.0701,
  "We booked our event for a birthday party on 8/1/2022 and the day before the event, Ryptic sent an automated cancellation. When I emailed for a manager and called to find out what happened, there was no response.  Later, I get an email saying the host couldn't make it and so they had to cancel.  Poorly handled.  Extremely disappointing Ryptic.  This is not how you run a business!  Now we are left last minute with not birthday plans...wow...so inconsiderate and thoughtless.  I get that things happen, but a courtesy call and, hey, we're sorry, how can we make things right with you would have been a better option.  This is why you're getting a one-star review.  So for all fut

In [60]:
for sentiment_score, text in test_review_sentiment_list:
    print("{} : {}".format(sentiment_score, text))

0.275 : We had a great game master, but that's about all I can say that was good about this escape room. I've done dozens of escape rooms and this is one of the jankiest rooms I've done. Low tech, you can see all four rooms when you walk in, and the walls are made of legos. And no, I'm not joking.
-0.0701 : We booked our event for a birthday party on 8/1/2022 and the day before the event, Ryptic sent an automated cancellation. When I emailed for a manager and called to find out what happened, there was no response.  Later, I get an email saying the host couldn't make it and so they had to cancel.  Poorly handled.  Extremely disappointing Ryptic.  This is not how you run a business!  Now we are left last minute with not birthday plans...wow...so inconsiderate and thoughtless.  I get that things happen, but a courtesy call and, hey, we're sorry, how can we make things right with you would have been a better option.  This is why you're getting a one-star review.  So for all future custome

In [25]:
print("{} - {}\n-----------".format(room_name, room_city))
for element in span_elements:
    text = element.text
    blob = TextBlob(text)
    sentiment_score = blob.sentiment.polarity
    print("{:.4f} : {}".format(sentiment_score, text))

['RYPTIC ROOM ESCAPE ',
 ' 94 Photos & 172 Reviews ',
 ' 205 E 3rd Ave, San Mateo, California ',
 ' Updated March 2024 ',
 ' Escape Games ',
 ' Phone Number ',
 ' Yelp']

In [67]:
### TESTING MYSQL CONNECTION

import mysql.connector

# Establish a connection to the MySQL database
cnx = mysql.connector.connect(
    host="localhost",
    user="root",
    password="dabaka22",
    database="shop"
)

cursor = cnx.cursor()

sql_query = ("SELECT * FROM customers")

cursor.execute(sql_query)

for (id, first_name, last_name, email) in cursor:
  print(f'{id}, {first_name}, {last_name}, {email}')
cursor.close()
cnx.close()


1, Boy, George, george@gmail.com
2, George, Michael, gm@gmail.com
3, David, Bowie, david@gmail.com
4, Blue, Steele, blue@gmail.com
5, Bette, Davis, bette@aol.com
