In [1]:
import json
import os
import time
import requests
import numpy as np
import pandas as pd
from fake_useragent import UserAgent
from tqdm.notebook import tqdm

In [2]:
def get_comment_url(main_page_url, start_idx_value):
    s1, s2 = main_page_url.split('1s0x')[1].split('!')[0].split(':0x')
    comment_url = 'https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=zh-TW&gl=tw&pb=!1m2!1y{}!2y{}!2m2!1i{}!2i10!3e1!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1s!7e81' \
                    .format(int(s1, 16), int(s2, 16), start_idx_value)
    return comment_url

In [3]:
user_agent = UserAgent()
os.makedirs('comment_data', exist_ok=True)

def get_comment_data(type_name):
    df = pd.read_csv(f'main_page_data/{type_name}_after.csv', encoding='utf-16')
    display(df)

    # 這邊算為了算 total comment number 而已, 為了做 tqdm bar
    comment_num_list = []
    for i in range(len(df)):
        title, comment_num, main_page_url = df.iloc[i]

        if str(comment_num) == 'nan':
            continue

        comment_num = int(comment_num.split(' ')[0].replace(',', ''))
        comment_num_list.append(comment_num // 10)

    progress = tqdm(total = sum(comment_num_list))
    os.makedirs(f'comment_data/{type_name}', exist_ok=True)
    for i in range(len(df)):
        title, comment_num, main_page_url = df.iloc[i]

        if str(comment_num) == 'nan':
            continue

        title = title[1:-2] # remove blank
        for invalid_sign in ' /／\%()（）｜|':
            title = title.replace(invalid_sign, '_')
        comment_num = int(comment_num.split(' ')[0].replace(',', ''))

        authors = list()
        stars = list() 
        comments = list()

        try:
            for i in range(0, comment_num // 10):
                start_index_value = i * 10
                comment_url = get_comment_url(main_page_url, start_index_value)

                response = requests.get(comment_url, headers = {'user-agent': user_agent.random})
                text = response.text.replace(')]}\'', '')
                comments_list = json.loads(text)[2]

                for content in comments_list:
                    author, comment, star = content[0][1], content[3], content[4]

                    if comment == None: 
                        continue
                    elif comment.find("由 Google 提供翻譯") == 1:
                        google_len, origin_len = len("由 Google 提供翻譯"), len("(原始評論)")
                        start_index, end_index = comment.find("由 Google 提供翻譯") + google_len + 1, comment.find("(原始評論)") - 2
                        comment = comment[start_index:end_index]

                    authors.append(str(author))
                    stars.append(str(star))
                    comments.append(str(comment))
                progress.update(1)

            google_comment_df = pd.DataFrame({
                "Author": authors,
                "Star": stars,
                "Comment": comments,
            })

            google_comment_df.to_excel(f"comment_data/{type_name}/{title}.xlsx", encoding = 'utf-16', index = False)
            print(f"{title}.xlsx has been saved.")
        except:
            print(f"fail to save {title}.xlsx")


In [9]:
get_comment_data('restaurant')

Unnamed: 0,Title,Comments,Url
0,LALA Kitchen 新美式餐廳 交大店,994 則評論,https://www.google.com/maps/place/LALA+Kitchen...
1,和選旅-硯宴軒,350 則評論,https://www.google.com/maps/place/%E5%92%8C%E9...
2,漢堡王Burger King 交大店,315 則評論,https://www.google.com/maps/place/%E6%BC%A2%E5...
3,RINGOAL車庫餐廳,383 則評論,https://www.google.com/maps/place/RINGOAL%E8%B...
4,橄欖樹廚房,669 則評論,https://www.google.com/maps/place/%E6%A9%84%E6...
...,...,...,...
265,夠麻吉股份有限公司,409 則評論,https://www.google.com/maps/place/%E5%A4%A0%E9...
266,紓咖,20 則評論,https://www.google.com/maps/place/%E7%B4%93%E5...
267,太魯閣晶英酒店梅園中餐廳,174 則評論,https://www.google.com/maps/place/%E5%A4%AA%E9...
268,CITYLINK 內湖店,"1,953 則評論",https://www.google.com/maps/place/CITYLINK+%E5...


  0%|          | 0/33448 [00:00<?, ?it/s]

fail to save LALA_Kitchen_新美式餐廳_交大店.xlsx
和選旅-硯宴軒.xlsx has been saved.
漢堡王Burger_King_交大店.xlsx has been saved.
RINGOAL車庫餐廳.xlsx has been saved.
橄欖樹廚房.xlsx has been saved.
和選旅-凡爾賽_僅供早餐_.xlsx has been saved.
托斯卡尼尼義大利餐廳-竹科店.xlsx has been saved.
儷舍坊.xlsx has been saved.
清華水漾餐廳.xlsx has been saved.
麥當勞-新竹交大店.xlsx has been saved.
貳貳柒_lab227.xlsx has been saved.
新天地美食館.xlsx has been saved.
英美食坊.xlsx has been saved.
步藏私.xlsx has been saved.
國立交通大學第一餐廳.xlsx has been saved.
YATS葉子.xlsx has been saved.
貝恩義大利料理.xlsx has been saved.
伊尹小築.xlsx has been saved.
小方桌__原_小餐桌__La_Petite_Table.xlsx has been saved.
漢神鐵板燒_新竹鐵板燒_新竹美食_新竹餐廳_新竹必吃_光復路必吃_光復路美食_光復路餐廳_園區美食_.xlsx has been saved.
佬法室早午餐_Old_Brunch.xlsx has been saved.
松江屋-慈雲店.xlsx has been saved.
FIZZ_費茲餐廳___異國料理、新竹美食__早餐四樓_午晚餐二樓_.xlsx has been saved.
禧樂_丼飯_刺身_炭燒.xlsx has been saved.
La_stella義式人文餐廳.xlsx has been saved.
茉莉小鎮.xlsx has been saved.
Mumu餐桌.xlsx has been saved.
fail to save Uncles’_義國料理.xlsx
Garden_Party.xlsx has been saved.
傳家生活小館.xlsx has

欣葉鐘菜_中城廣場店.xlsx has been saved.
雍翠庭_Chinese_Cuisine_-_北投麗禧溫泉酒店_Grand_View_Resort_Beitou.xlsx has been saved.
圓安宴餐廳.xlsx has been saved.
敘日_全日餐廳-台北六福萬怡酒店.xlsx has been saved.
22:02火鍋南港店.xlsx has been saved.
飛花落院.xlsx has been saved.
君品酒店.xlsx has been saved.
木盒子柴燒窯烤披薩_假日無訂位、路邊可停車_.xlsx has been saved.
遠東SOGO_台北敦化館.xlsx has been saved.
啖飯_19101_cuisine.xlsx has been saved.
和苑三井花園飯店_台北忠孝.xlsx has been saved.
國立清華大學.xlsx has been saved.
fail to save CITYLINK南港店.xlsx
北投麗禧溫泉酒店_Grand_View_Resort_Beitou.xlsx has been saved.
客家文化館好客餐廳.xlsx has been saved.
梅子活海鮮餐廳.xlsx has been saved.
行尞餐廳.xlsx has been saved.
紅樓中餐廳.xlsx has been saved.
圓山大飯店.xlsx has been saved.
集英樓餐廳.xlsx has been saved.
Reboot_Cafe.xlsx has been saved.
映景觀餐廳_裕元花園酒店_.xlsx has been saved.
菇菇熊gugubear.xlsx has been saved.
時刻動漫餐廳SKR.xlsx has been saved.
德龍商店_新竹水潤餅_.xlsx has been saved.
ZEBRA義式餐廳.xlsx has been saved.
forchetta_餐廳.xlsx has been saved.
屋馬燒肉園邸店.xlsx has been saved.
饗賓餐旅事業.xlsx has been saved.
与玥樓頂級粵菜餐廳.xlsx has been 

In [10]:
get_comment_data('attraction')

Unnamed: 0,Title,Comments,Url
0,竹湖,31 則評論,https://www.google.com/maps/place/%E7%AB%B9%E6...
1,寄梅亭,10 則評論,https://www.google.com/maps/place/%E5%AF%84%E6...
2,清華大學成功湖,379 則評論,https://www.google.com/maps/place/%E6%B8%85%E8...
3,靜心湖,"2,107 則評論",https://www.google.com/maps/place/%E9%9D%9C%E5...
4,沃夫帕克WolfPark 親子館(新竹親子餐廳/親子景點/室內親子),529 則評論,https://www.google.com/maps/place/%E6%B2%83%E5...
...,...,...,...
285,好好聚落,395 則評論,https://www.google.com/maps/place/%E5%A5%BD%E5...
286,宮原眼科,"24,400 則評論",https://www.google.com/maps/place/%E5%AE%AE%E5...
287,鳳鼻頭文化遺址,200 則評論,https://www.google.com/maps/place/%E9%B3%B3%E9...
288,高雄市忠烈祠,"1,896 則評論",https://www.google.com/maps/place/%E9%AB%98%E9...


  0%|          | 0/153138 [00:00<?, ?it/s]

竹湖.xlsx has been saved.
寄梅亭.xlsx has been saved.
清華大學成功湖.xlsx has been saved.
靜心湖.xlsx has been saved.
沃夫帕克WolfPark_親子館_新竹親子餐廳_親子景點_室內親子_.xlsx has been saved.
青草湖.xlsx has been saved.
新竹之心.xlsx has been saved.
幾米廣場.xlsx has been saved.
新竹科學園區探索館.xlsx has been saved.
fail to save 新竹公園.xlsx
豆腐岩.xlsx has been saved.
竹東稻田迷宮.xlsx has been saved.
新竹市立動物園.xlsx has been saved.
關新公園.xlsx has been saved.
新城風糖休閒園區.xlsx has been saved.
寶山水庫.xlsx has been saved.
交清小徑_清交小徑_.xlsx has been saved.
fail to save 青青草原.xlsx
明湖公園.xlsx has been saved.
新寶觀光果園.xlsx has been saved.
赤土崎公園.xlsx has been saved.
新竹市消防博物館.xlsx has been saved.
高峰植物園.xlsx has been saved.
親子公園.xlsx has been saved.
新竹頭前溪橋左岸櫻花步道.xlsx has been saved.
台積創新館.xlsx has been saved.
fail to save 新竹市眷村博物館.xlsx
清大櫻花林.xlsx has been saved.
水圳森林公園.xlsx has been saved.
中央公園.xlsx has been saved.
下竹樹屋.xlsx has been saved.
Eighteen_Peaks_Mountain_Park.xlsx has been saved.
香山綠色隧道.xlsx has been saved.
濟生Beauty新竹觀光工廠_預約制_.xlsx has been saved.
fail to save 

國立臺灣史前文化博物館南科考古館.xlsx has been saved.
好好聚落.xlsx has been saved.
宮原眼科.xlsx has been saved.
鳳鼻頭文化遺址.xlsx has been saved.
高雄市忠烈祠.xlsx has been saved.
德元埤旅遊服務中心.xlsx has been saved.


In [11]:
get_comment_data('bookstore')

Unnamed: 0,Title,Comments,Url
0,麗文校園書局(交通大學店),68 則評論,https://www.google.com/maps/place/%E9%BA%97%E6...
1,水木書苑,763 則評論,https://www.google.com/maps/place/%E6%B0%B4%E6...
2,大學書局,"1,231 則評論",https://www.google.com/maps/place/%E5%A4%A7%E5...
3,筆耕小書店,86 則評論,https://www.google.com/maps/place/%E7%AD%86%E8...
4,校園書房(新竹勝利店),110 則評論,https://www.google.com/maps/place/%E6%A0%A1%E5...
...,...,...,...
197,書屋花甲Ｘ而立書店（台大店）,84 則評論,https://www.google.com/maps/place/%E6%9B%B8%E5...
198,太平青鳥,520 則評論,https://www.google.com/maps/place/%E5%A4%AA%E5...
199,書屋花甲,284 則評論,https://www.google.com/maps/place/%E6%9B%B8%E5...
200,練習曲書店,342 則評論,https://www.google.com/maps/place/%E7%B7%B4%E7...


  0%|          | 0/11453 [00:00<?, ?it/s]

麗文校園書局_交通大學店_.xlsx has been saved.
水木書苑.xlsx has been saved.
大學書局.xlsx has been saved.
筆耕小書店.xlsx has been saved.
校園書房_新竹勝利店_.xlsx has been saved.
華通書坊_Huatung_Bookstore.xlsx has been saved.
朵多美語書店.xlsx has been saved.
友善書業供給合作社.xlsx has been saved.
金石堂台積店.xlsx has been saved.
A++參考書文具館.xlsx has been saved.
貓頭鷹人文.xlsx has been saved.
新興書局.xlsx has been saved.
上誼書店.xlsx has been saved.
昇大書局.xlsx has been saved.
321書市,新竹二手書店.xlsx has been saved.
書耕電腦書店.xlsx has been saved.
理科書局.xlsx has been saved.
銓民書店.xlsx has been saved.
東南街書店.xlsx has been saved.
以琳書房新竹門市.xlsx has been saved.
刺蝟二手書店-固定週日公休，有其它調整請見FB公告，謝謝！.xlsx has been saved.
金鼎兒童書店.xlsx has been saved.
墊腳石圖書廣場_新竹店.xlsx has been saved.
信誼書局.xlsx has been saved.
盛豐文化事業有限公司.xlsx has been saved.
金石堂書店_新竹店.xlsx has been saved.
梅竹影音租書城.xlsx has been saved.
花蝶_新竹振興店_.xlsx has been saved.
錦城漫畫影音租售館_錦城東山書坊_.xlsx has been saved.
海邊書坊.xlsx has been saved.
文國書局.xlsx has been saved.
玫瑰色二手書店.xlsx has been saved.
黎明書店.xlsx has been saved.
智忠書局.xls

In [None]:
get_comment_data('shopping')

Unnamed: 0,Title,Comments,Url
0,大聯大百貨生活館(交大門市),56 則評論,https://www.google.com/maps/place/%E5%A4%A7%E8...
1,小雅10元百貨,79 則評論,https://www.google.com/maps/place/%E5%B0%8F%E9...
2,水木百貨,103 則評論,https://www.google.com/maps/place/%E6%B0%B4%E6...
3,大信五金百貨複合館,34 則評論,https://www.google.com/maps/place/%E5%A4%A7%E4...
4,時代生活百貨,26 則評論,https://www.google.com/maps/place/%E6%99%82%E4...
...,...,...,...
291,寶島鐘錶 林口三井店,107 則評論,https://www.google.com/maps/place/%E5%AF%B6%E5...
292,御殿場PREMIUM OUTLETS,"20,135 則評論",https://www.google.com/maps/place/%E5%BE%A1%E6...
293,三井OUTLET PARK 木更津,"12,904 則評論",https://www.google.com/maps/place/%E4%B8%89%E4...
294,三井OUTLET PARK 幕張,"7,749 則評論",https://www.google.com/maps/place/%E4%B8%89%E4...


  0%|          | 0/140831 [00:00<?, ?it/s]

大聯大百貨生活館_交大門市_.xlsx has been saved.
小雅10元百貨.xlsx has been saved.
水木百貨.xlsx has been saved.
大信五金百貨複合館.xlsx has been saved.
fail to save 時代生活百貨.xlsx
旺財牛五金百貨生活館.xlsx has been saved.
fail to save 大遠百_新竹店.xlsx
長春10元商店.xlsx has been saved.
福龍行_進口百貨.xlsx has been saved.


In [4]:
get_comment_data('medical')

Unnamed: 0,Title,Comments,Url
0,康德診所,165 則評論,https://www.google.com/maps/place/%E5%BA%B7%E5...
1,長春診所,462 則評論,https://www.google.com/maps/place/%E9%95%B7%E6...
2,和杏中醫診所,55 則評論,https://www.google.com/maps/place/%E5%92%8C%E6...
3,茱麗婦產科,74 則評論,https://www.google.com/maps/place/%E8%8C%B1%E9...
4,倪耳鼻喉科診所,74 則評論,https://www.google.com/maps/place/%E5%80%AA%E8...
...,...,...,...
228,台灣醫院協會,3 則評論,https://www.google.com/maps/place/%E5%8F%B0%E7...
229,新竹縣衛生局,60 則評論,https://www.google.com/maps/place/%E6%96%B0%E7...
230,花蓮慈濟醫院,439 則評論,https://www.google.com/maps/place/%E8%8A%B1%E8...
231,中國醫藥大學附設醫院急診部,249 則評論,https://www.google.com/maps/place/%E4%B8%AD%E5...


  0%|          | 0/8432 [00:00<?, ?it/s]

康德診所.xlsx has been saved.
長春診所.xlsx has been saved.
和杏中醫診所.xlsx has been saved.
茱麗婦產科.xlsx has been saved.
倪耳鼻喉科診所.xlsx has been saved.
莊峻鏞內科診所.xlsx has been saved.
傅小兒科診所.xlsx has been saved.
深潭耳鼻喉科診所.xlsx has been saved.
黃瑞耳鼻喉科診所.xlsx has been saved.
新竹市家醫診所--欣悅光診所.xlsx has been saved.
吳雨圭診所-新竹家庭醫學科,新竹內科,新竹小兒科,新竹皮膚科,新竹風濕過敏.xlsx has been saved.
實和診所.xlsx has been saved.
林安復耳鼻喉科診所.xlsx has been saved.
柏廷耳鼻喉科.xlsx has been saved.
健群耳鼻喉科診所.xlsx has been saved.
新竹安慎診所_洗腎_糖尿病_新陳代謝科_腎臟病_一般內科_泌尿科_成長發育_心臟科_乳房外科門診推薦診所.xlsx has been saved.
宏仁診所.xlsx has been saved.
艾美時尚診所.xlsx has been saved.
阮皮膚科診所.xlsx has been saved.
江美麗婦產科診所Jiangmeili_OBS_Clinic_新竹_婦產科_女醫_推薦_HPV疫苗_健檢_經痛_感染_更年期_漏尿_產後修復_備孕_減重_抹片_雷射_子宮肌瘤.xlsx has been saved.
舜天診所.xlsx has been saved.
祐庭耳鼻喉科診所.xlsx has been saved.
凃富籌復健診所.xlsx has been saved.
宏偉婦產科診所.xlsx has been saved.
送子鳥生殖中心___Stork_Fertility_Center.xlsx has been saved.
詠恆中醫診所.xlsx has been saved.
麗明眼科診所.xlsx has been saved.
星和診所_竹北光明醫美.xlsx has been saved.
新綠牙醫診所.xlsx has 