Skip to content

Commit

Permalink
change div class based on gmaps in june 2022
Browse files Browse the repository at this point in the history
  • Loading branch information
ryuuzake committed Jun 27, 2022
1 parent 33d0f3c commit c03f6c2
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 12 deletions.
46 changes: 36 additions & 10 deletions googlemaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ def get_reviews(self, offset):

# parse reviews
response = BeautifulSoup(self.driver.page_source, 'html.parser')
rblock = response.find_all('div', class_='ODSEW-ShBeI NIyLF-haAclf gm2-body-2')
# TODO: Subject to changes
rblock = response.find_all('div', class_='jftiEf fontBodyMedium')
parsed_reviews = []
for index, review in enumerate(rblock):
if index >= offset:
Expand Down Expand Up @@ -203,16 +204,36 @@ def __parse(self, review):

item = {}

id_review = review.find('button', class_='ODSEW-ShBeI-JIbuQc-menu ODSEW-ShBeI-JIbuQc-menu-SfQLQb-title')['data-review-id']
username = review.find('div', class_='ODSEW-ShBeI-title').find('span').text

try:
review_text = self.__filter_string(review.find('span', class_='ODSEW-ShBeI-text').text)
# TODO: Subject to changes
id_review = review['data-review-id']
except Exception as e:
id_review = None

try:
# TODO: Subject to changes
username = review['aria-label']
except Exception as e:
username = None

try:
# TODO: Subject to changes
review_text = self.__filter_string(review.find('span', class_='wiI7pd').text)
except Exception as e:
review_text = None

rating = float(review.find('span', class_='ODSEW-ShBeI-H1e3jb')['aria-label'].split(' ')[1])
relative_date = review.find('span', class_='ODSEW-ShBeI-RgZmSc-date').text
try:
# TODO: Subject to changes
rating = float(review.find('span', class_='kvMYJc')['aria-label'].split(' ')[1])
except Exception as e:
rating = None

try:
# TODO: Subject to changes
relative_date = review.find('span', class_='rsqaWe').text
except Exception as e:
relative_date = None

try:
n_reviews_photos = review.find('div', class_='section-review-subtitle').find_all('span')[1].text
Expand All @@ -229,13 +250,16 @@ def __parse(self, review):
n_reviews = 0
n_photos = 0

user_url = review.find('a')['href']
try:
user_url = review.find('a')['href']
except Exception as e:
user_url = None

item['id_review'] = id_review
item['caption'] = review_text

# depends on language, which depends on geolocation defined by Google Maps
# custom mapping to transform into date shuold be implemented
# custom mapping to transform into date should be implemented
item['relative_date'] = relative_date

# store datetime of scraping and apply further processing to calculate
Expand Down Expand Up @@ -268,14 +292,16 @@ def __parse_place(self, response):
# expand review description
def __expand_reviews(self):
# use XPath to load complete reviews
links = self.driver.find_elements_by_xpath('//button[@class=\'section-expand-review blue-link\']')
# TODO: Subject to changes
links = self.driver.find_elements_by_xpath('//button[@jsaction="pane.review.expandReview"]')
for l in links:
l.click()
time.sleep(2)


def __scroll(self):
scrollable_div = self.driver.find_element_by_css_selector('div.siAUzd-neVct.section-scrollbox.cYB2Ge-oHo7ed.cYB2Ge-ti6hGc')
# TODO: Subject to changes
scrollable_div = self.driver.find_element_by_css_selector('div.m6QErb.DxyBCb.kA9KIf.dS8AEf')
self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
#self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

Expand Down
17 changes: 15 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
beautifulsoup4==4.6.0
selenium==3.14.0
certifi==2022.6.15
charset-normalizer==2.0.12
colorama==0.4.5
configparser==5.2.0
crayons==0.4.0
idna==3.3
numpy==1.23.0
pandas==1.4.3
pymongo==3.9.0
python-dateutil==2.8.2
pytz==2022.1
requests==2.28.0
selenium==3.14.0
six==1.16.0
termcolor==1.1.0
webdriver-manager==3.5.2
urllib3==1.26.9
webdriver-manager==3.5.2

0 comments on commit c03f6c2

Please sign in to comment.