In [2]:
#!/usr/bin/env python  
# encoding: utf-8  



"""爬取淘宝美食信息"""

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.wait import TimeoutException
import re
from pyquery import PyQuery as pq
from config import *
import pymongo

client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]

#使用 Chrome解析网页
browser = webdriver.Chrome()

#使用phantomjs解析网页
#path = 'D://phantomjs//bin//phantomjs.exe'
#browser = webdriver.PhantomJS(executable_path=path,service_args=SERVICE_ARGS)
#browser.set_window_size(1400,900)

wait = WebDriverWait(browser,10)

def search():
    """查询关键词返回总页数"""
    try:
        browser.get('https://www.taobao.com')
        #显示等待使webdriver等待某个条件成立时继续执行，否则在达到最大时长抛出超时异常（TimeoutException）
        input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#q")))
        submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#J_TSearchForm > div.search-button > button")))
        input.send_keys('美食')
        submit.click()
        total = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#mainsrp-pager > div > div > div > div.total")))
        get_products()
        return total.text
    except TimeoutException:
        return search()

def next_page(page_number):
    """模拟浏览器翻页"""
    try:
        input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > div.form > input")))
        submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit")))
        input.clear()
        input.send_keys(page_number)
        submit.click()
        wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,"#mainsrp-pager > div > div > div > ul > li.item.active > span"),str(page_number)))
        get_products()
    except TimeoutException:
        next_page(page_number)

def get_products():
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#mainsrp-itemlist .items .item")))
    html = browser.page_source
    doc = pq(html)
    items = doc('#mainsrp-itemlist .items .item').items()
    for item in items:
        product = {
            'image':item.find('.pic .img').attr('data-src'),
            'price':item.find('.price').text(),
            'deal':item.find('.deal-cnt').text()[:-3],
            'title':item.find('.title').text(),
            'shop':item.find('.shop').text(),
            'location':item.find('.location').text()
        }
        print(product)
        save_to_mongo(product)

def save_to_mongo(result):
    try:
        if db[MONGO_TABLE].insert(result):
            print('存储到MONGODB成功',result)
    except Exception:
        print('存储到MONGODB失败',result)


def main():
    total = search()
    total = int(re.findall(r'共(.*?)页',total)[0])
    for i in range(2,total + 1):
        next_page(i)
    browser.close()

if __name__ == '__main__':
    main()


{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i3/1129506063780078357/TB2HiLEtl8lpuFjSspaXXXJKpXa_!!0-saturn_solar.jpg', 'price': '¥ 9.98', 'deal': '1322', 'title': '买三包邮羊杂汤内蒙古特产羊杂碎羊汤羊肉汤即食羊肉类熟食238g', 'shop': '骄子牧场旗舰店', 'location': '内蒙古 呼和浩特'}




存储到MONGODB成功 {'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i3/1129506063780078357/TB2HiLEtl8lpuFjSspaXXXJKpXa_!!0-saturn_solar.jpg', 'price': '¥ 9.98', 'deal': '1322', 'title': '买三包邮羊杂汤内蒙古特产羊杂碎羊汤羊肉汤即食羊肉类熟食238g', 'shop': '骄子牧场旗舰店', 'location': '内蒙古 呼和浩特', '_id': ObjectId('5a79ddb94124bc23f0227d44')}
{'image': '//g-search2.alicdn.com/img/bao/uploaded/i4/i1/619123122/TB1dc3OfqLN8KJjSZFvXXXW8VXa_!!0-item_pic.jpg', 'price': '¥ 20.90', 'deal': '63423', 'title': '良品铺子肉松饼传统糕点点心零食早餐 美食 特产小吃休闲食品散装', 'shop': '良品铺子旗舰店', 'location': '湖北 武汉'}
存储到MONGODB成功 {'image': '//g-search2.alicdn.com/img/bao/uploaded/i4/i1/619123122/TB1dc3OfqLN8KJjSZFvXXXW8VXa_!!0-item_pic.jpg', 'price': '¥ 20.90', 'deal': '63423', 'title': '良品铺子肉松饼传统糕点点心零食早餐 美食 特产小吃休闲食品散装', 'shop': '良品铺子旗舰店', 'location': '湖北 武汉', '_id': ObjectId('5a79ddce4124bc23f0227d45')}
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i4/619123122/TB1B4moXkCWBuNjy0FaXXXUlXXa_!!0-item_pic.jpg', 'price': '¥ 13.90', 'deal': '48315', 'tit

存储到MONGODB成功 {'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i4/2891702599/TB19pYAekfb_uJkSnhJXXbdDVXa_!!0-item_pic.jpg', 'price': '¥ 125.00', 'deal': '593', 'title': '老杭邦龙井酥5口味套装 杭州特产 美食 手工糕点点心零食小吃实惠装', 'shop': '老杭邦旗舰店', 'location': '浙江 杭州', '_id': ObjectId('5a79ddce4124bc23f0227d66')}
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i1/TB112OOMpXXXXXKXpXXXXXXXXXX_!!0-item_pic.jpg', 'price': '¥ 28.00', 'deal': '61', 'title': '天空之城 江南特产袜底酥苏州特产苏州时令 美食 袜底酥 两盒优惠', 'shop': 'momicafe天空之城旗舰店', 'location': '江苏 苏州'}
存储到MONGODB成功 {'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i1/TB112OOMpXXXXXKXpXXXXXXXXXX_!!0-item_pic.jpg', 'price': '¥ 28.00', 'deal': '61', 'title': '天空之城 江南特产袜底酥苏州特产苏州时令 美食 袜底酥 两盒优惠', 'shop': 'momicafe天空之城旗舰店', 'location': '江苏 苏州', '_id': ObjectId('5a79ddce4124bc23f0227d67')}
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/746658033/TB1fHphfBfM8KJjSZPiXXXdspXa_!!0-item_pic.jpg', 'price': '¥ 35.60', 'deal': '1033', 'title': '丫眯鲜花饼云南特产正宗昆明 美食 糕点零食800g装

WebDriverException: Message: chrome not reachable
  (Session info: chrome=62.0.3202.94)
  (Driver info: chromedriver=2.34.522940 (1a76f96f66e3ca7b8e57d503b4dd3bccfba87af1),platform=Windows NT 6.3.9600 x86_64)
