In [4]:
"""
-*- coding: utf-8 -*-
========================
Python GoogleMap API
========================
Developed by: Nick Wang
Email: hsnuwindband52@gmail.com
========================
"""
import requests
import re
import json
from bs4 import BeautifulSoup
import time
import pprint

class GoogleMapCrawler():
    #--------------------------------------------------------------------------------------------------------------------
    # load_restaurant_list(self, searchURL) can be used to search keyword on GoogleMap and get searching result HTML,
    # then we parse HTML into python list which contains restaurants information.
    #
    # The function returns a list, each element in the list contains a resturant info.
    # element example:
    # [   '山東姥姥麵食',                           //restaurant name
    #     '112台北市北投區石牌路一段71巷3弄2號',     //restaurant address
    #     3.9,                                    //restaurant star on GoogleMap
    #     499,                                    //number of reviews on GoogleMap
    #     'https://lh5.googleusercontent.com/p/AF1QipOyBA_KNeXu6QeGTBxgU1lZW0orBIgPe58V5V0-=w80-h106-k-no', //photo url
    #     [25, 28, 90, 209, 147],                 //star distribution
    #     ['3765764594505960015', '-5897351976551024977'] //API key, it whold be used in load_restaurant_reviews()
    # ]
    #--------------------------------------------------------------------------------------------------------------------
    def load_restaurant_list(self, searchURL):
        pp = pprint.PrettyPrinter(indent=4)
        # download google map search result
        r = requests.get(searchURL)
        # start timer
        tStart = time.time()
        
        
            # use BeautifulSoup to parse HTML code
        soup = BeautifulSoup(r.text, 'html.parser')
        script_tag = soup.find_all("script")
        context_script = script_tag[7]
        del script_tag

        #deal the javascript code
        context = str(context_script.text)
        context = context.replace("\n'",'')
        context = context.replace("\n",'')
        #context = context.replace('\n','')
        context = context.replace(' ','')
        context = context.replace("')]}\'" , "")
        context = context.replace(",]" , "]")
        
        parsed = re.findall(r'APP_INITIALIZATION_STATE.*?=s*(.*?);', context, re.DOTALL | re.MULTILINE)

        #combine lists 
        parsed [:] = [''.join(parsed[:])] 
        parsed = str(parsed[0])
        
        #translate json like string to python list
        res = json.loads(parsed) 
        res = res[3][2]
        res = res.replace(")]}'" , "")
        res = json.loads(res)
        res = res[0][1]
        res.pop(0)
        
        """
        i = 0
        for element in res:
            print("\033[1;" + str(30 + i) + "m" + str(element) + "\033[0m")
            i = (i + 1)%7
        """
        restaurantList = []
        for restaurant in res:
            name =             restaurant[14][11]
            address =          restaurant[14][2][0]
            starScore =        restaurant[14][4][7]
            reviewsCount =     restaurant[14][4][8]
            imageURL =         restaurant[14][37][0][0][6][0]
            starDistribution = restaurant[14][52][3]
            restaurantKey =    restaurant[14][37][0][0][29] # use in reviews API
            restaurantList.append([name, address, starScore, reviewsCount, imageURL, starDistribution, restaurantKey])
            
        tEnd = time.time()# stop the timer
        # print the cost time
        #print( "Function load_restaurant_list() costs %f sec" % (tEnd - tStart))
        
        #pp.pprint(restaurantList)
        """
        with open('listFormat.txt', 'w+') as f:
            resfile = pprint.pformat(res[0], indent=4)
            f.write(resfile)
        """
        #SearchResult.TYPE_RESTAURAN
        return restaurantList
    #--------------------------------------------------------------------------------------------------------------------
    # load_restaurant_reviews(self, keyList, reviewsNumber) is used to grab reviews on Google Map, it requests 2 input,
    #     keyList is a list which stores 2 number, and reviewsNumber is the number of reviews about a spicific place.
    # The function returns a list that contains all reviews and reviewr's info.
    # Return list element format example:
    #
    # [   'https://www.google.com/maps/contrib/115648954186659214459?hl=zh-Hant-TW', //reviewr's page
    #     'Zila Me', //reviewr's name
    #     'https://lh5.googleusercontent.com/-kKgDyPvItF4/AAAAAAAAAAI/AAAAAAAAAAA/pIIdJfD_MX4/c-rp-mo-br100/photo.jpg', /reviewr's photo
    #     '6 個月前', //how many month ago
    #     '之前在澳洲吃一次，覺得好吃，來了東京當然不放過～鬆餅和莎拉必點哦～～吃完還可以去明治神宮走走', //review
    #     5, //starscore the reviewer gave
    #     6] //reviewer's rank
    #--------------------------------------------------------------------------------------------------------------------
    def load_restaurant_reviews(self, keyList, reviewsNumber):
        # url = "https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=zh-TW&gl=tw&pb=!1m2!1y3765764574551535233!2y9635267168129670138!2m2!1i8!2i10!3e1!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1sGErSXsr0MYqRr7wPzO6GOA!7e81"
        # Download reviews under specific place in googlemap.
        # TO DO: 1. add user agent, proxy and request header.
        
        reviewsList = []
        # calculate how many times we have to get reviews from api.
        loopTimes = reviewsNumber//10 
        if((reviewsNumber%10) > 0):loopTimes = loopTimes + 1
            
        for i in range(loopTimes):
            index = i
            if(index == 0): index = ""
            url = "https://www.google.com/maps/preview/review/listentitiesreviews?authuser=0&hl=zh-TW&gl=tw&pb=!1m2!1y" \
            + str(keyList[0])+ "!2y" + str(keyList[1]) + "!2m2!1i" + str(index) \
            + "8!2i10!3e1!4m5!3b1!4b1!5b1!6b1!7b1!5m2!1sGErSXsr0MYqRr7wPzO6GOA!7e81"
        
            r = requests.get(url, allow_redirects=True)
            context = r.content.decode(encoding='UTF-8',errors='strict')
            #parse the string we got from url
            context = context.replace("\n'",'')
            context = context.replace('\n','')
            context = context.replace(")]}'" , "")
            #translate string into list
            reviews = json.loads(context)        
            reviews = reviews[2]
            #print(reviews[2])
            for person in reviews:
                [reviewerPage, name, photo] = person[0][:3]
                if(person[12][1][0] == None):
                    person[12][1][0] = [0]
                if(person[3] == None):
                    person[3] = ""
                [time, reviewString, starScore, guideRank] = [person[1], person[3].replace("\n",''), person[4], person[12][1][0][0]]
                
                reviewsList.append([reviewerPage, name, photo, time, reviewString, starScore, guideRank])
            
        return reviewsList
    
def main():
    gm = GoogleMapCrawler()    
                                               
    restaurant_list = gm.load_restaurant_list("https://www.google.com.tw/maps/search/蛋餅/data=!3m1!4b1?hl=zh-TW")
    #reviews_list = gm.load_restaurant_reviews(restaurant_list[0][6],restaurant_list[0][3]) #API key and number of reviews
    reviews_list = gm.load_restaurant_reviews(restaurant_list[0][6],10)
    print(restaurant_list[0][0])
    print("the restaurant has " + str(restaurant_list[0][3])+" reviews , output " + str(len(reviews_list)) )
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(reviews_list)

if __name__ == '__main__':
    main()

鬆餅bills
the restaurant has 1527 reviews10
[   [   'https://www.google.com/maps/contrib/103411436606337535083?hl=zh-Hant-TW',
        '許子麒',
        'https://lh5.googleusercontent.com/-Bykk8Zv-11I/AAAAAAAAAAI/AAAAAAAAAAA/o14VOvWnz2g/c-rp-mo-br100/photo.jpg',
        '3 個月前',
        '好吃到我把奶油當成香蕉整塊一口吃下去了😂',
        5,
        5],
    [   'https://www.google.com/maps/contrib/114153548685997450945?hl=zh-Hant-TW',
        'Jimmy Luk',
        'https://lh5.googleusercontent.com/-IO8EBq_AVSI/AAAAAAAAAAI/AAAAAAAAAAA/u50AkunMnuU/c-rp-mo-br100/photo.jpg',
        '5 個月前',
        '早餐食Pancake, 好濃的牛油及蛋香味, 喜歡Pancake的朋友一定不能錯過, 早上約10:30去到, '
        '排左10分鐘左右就可以入座！👍👍',
        5,
        5],
    [   'https://www.google.com/maps/contrib/103307723888242471577?hl=zh-Hant-TW',
        '迪迪史',
        'https://lh5.googleusercontent.com/-9OwlxCXf9U4/AAAAAAAAAAI/AAAAAAAAAAA/KBq4emRTo_Y/c-rp-mo-br100/photo.jpg',
        '2 個月前',
        '有點像鬆餅舒芙蕾，口感是很綿密',
        5,
        3],
    [   'https://www.google.com