In [1]:
# coding: utf-8
import sys
import json
import time
import csv
import codecs
import requests
from bs4 import BeautifulSoup
import crawler_module

# 每個連線要求的延遲時間(秒)，如果要延遲100ms，可以設定為0.1
delay_time = 1

# 輸入的檔案名稱，CSV格式，要有兩欄: 地址、關鍵字
input_filename = 'google_map_address.txt'

# 輸出的檔案名稱
output_filename = 'google_map_result.csv'

# 待處理的網址清單
urls = []

# 初始化
lat = None
lng = None
keyword = None
pagetoken = None

def generate_url(lat, lng, keyword, pagetoken = None):
    API_KEY = crawler_module.get_google_api_key()
    radius = 7000 #單位公尺
    if pagetoken is None:
        url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type-restaurant&keyword={}&key={}'.format(lat, lng, radius, keyword, API_KEY)
    else:
        url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?pagetoken={}&key={}'.format(pagetoken, API_KEY)
    return url
    
def main(address, _keyword):
    print(address, _keyword)
    global keyword
    keyword = _keyword
    lat, lng = crawler_module.address_to_location(address)

    # 產生網址
    urls.append(generate_url(lat, lng, keyword, None))
    
    # 處理各個網址
    while len(urls) > 0:
        crawler(urls[0])
        del urls[0]
    print('done')

def crawler(url):
    #print(url)
    res = requests.get(url)
    res.encoding = 'utf-8'

    # 解析結果
    data = json.loads(res.text)

    print("總共{}筆資料".format(len(data['results'])))
    if len(data) == 0:
        return


    # 寫入CSV檔
    global output_filename
    for row in data['results']:
        print(row['name'], row['vicinity'])
        get_place_detail(row['place_id'])


    # 判斷是否有下一頁
    if 'next_page_token' in data:
        print('發現有下一頁')
        urls.append(generate_url(None, None, None, data['next_page_token']))
    #print(res.text)

    time.sleep(delay_time)


def get_place_detail(place_id):
    # 產生網址
    API_KEY = crawler_module.get_google_api_key()
    url = 'https://maps.googleapis.com/maps/api/place/details/json?placeid={}&key={}&language=zh-TW'.format(place_id, API_KEY)
    print(url)

    #送出連線要求
    res = requests.get(url)
    res.encoding = 'utf-8'

    # 解析結果
    data = json.loads(res.text)

    # 檢查是否有結果
    if data['result'] is None:
        print('查無結果')
        return

    # 地點名稱
    name = data['result']['name']
    # 類型
    # 電話
    phone_number = ''
    if 'formatted_phone_number' in data['result']:
        phone_number = data['result']['formatted_phone_number']
    # 網站
    website = ''
    if 'website' in data['result']:
        website = data['result']['website']
    # 營業時間
    opening_hours = ''
    if 'opening_hours' in data['result']:
        opening_hours = '、'.join(data['result']['opening_hours']['weekday_text'])
    # rating
    rating = ''
    if 'rating' in data['result']:
        rating = data['result']['rating'] 

    # 地址相關處理
    postal_code = ''
    city = ''
    district = ''
    route = ''
    for address_component in data['result']['address_components']:
        if 'postal_code' in address_component['types']:
            postal_code = address_component['long_name']
        elif 'administrative_area_level_1' in address_component['types']:
            city = address_component['long_name']
        elif 'administrative_area_level_3' in address_component['types']:
            district = address_component['long_name']
        elif 'route' in address_component['types']:
            route = address_component['long_name']

    address = data['result']['formatted_address']

    # 嘗試擷取reviews總數、餐廳類型
    reviews = 0
    cuisine = ''
    url = 'https://www.google.com/search?safe=off&q={}+{}&hl=en'.format(name, address)
    print(url)
    #送出連線要求
    res = requests.get(url)
    res.encoding = 'utf-8'

    # 解析結果
    html = res.text
    # print(html)
    pos1 = html.find(' reviews</span>')
    if pos1 > -1:
        pos2 = html.find('#777">', pos1 - 12, pos1)
        if pos2 > -1:
            review_str = html[pos2 + 6:pos1].replace(',', '').strip()
            reviews = int(review_str)
            print('reviews={}'.format(reviews))
        else:
            print('查無HTML標籤')

        # 擷取餐廳類型
        pos1  = html.find('<span>', pos1)
        if pos1 > -1:
            pos1 += 6
            pos2 = html.find('</span>', pos1)
            if pos2 > -1:
                cuisine = html[pos1:pos2].replace(',', '').strip()
                cuisine = BeautifulSoup(cuisine, "html.parser")
                #print(cuisine)
    else:
        print('查無評論關鍵字')
        
    # 要寫入CSV的欄位
    cols = []
    cols.append(name)
    cols.append(cuisine)
    cols.append(phone_number)
    cols.append(opening_hours)
    cols.append(city)
    cols.append(postal_code)
    cols.append(district)
    cols.append(route)
    cols.append(address)
    cols.append(data['result']['geometry']['location']['lat'])
    cols.append(data['result']['geometry']['location']['lng'])
    cols.append(rating)
    cols.append(reviews)
    cols.append(website)

    # 寫入檔案
    with codecs.open(output_filename, 'a+', 'utf-8') as f:
        for col in cols:
            f.write('"{}",'.format(col))
        f.write('\r\n')
        f.close()

    time.sleep(delay_time)

if __name__ == '__main__':
    with open(input_filename) as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader, None) # skip the headers
        for row in reader:
            #print(row[0])
            main(row[0], row[1])

ModuleNotFoundError: No module named 'crawler_module'