**操作流程：**

1. 搜索关键词
利用 Selenium 驱动浏览器搜索关键词，得到查询后的商品列表
2. 分析页码并翻页
得到商品页码数，模拟翻页，得到后续页面的商品列表
3. 分析提取商品内容
利用 PyQuery 分析源码，解析得到商品内容
4. 存储至 MongoDB
将商品信息存储至 MongoDB 数据库

In [1]:
# -*- coding: utf-8 -*-
# @Time    : 2018/10/16 19:57
# @Author  : Divinerhjf
# @Software: Jupyter Notebook

import pymongo                                                       # MongoDB 数据库接口
from selenium import webdriver                                       # 引入浏览器驱动模块
from selenium.webdriver.common.by import By                          # 设置等待 https://selenium-python-zh.readthedocs.io/en/latest/waits.html
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException              # 等待超时处理
from pyquery import PyQuery as pq                                    # 解析库引入
from urllib.parse import quote

browser = webdriver.Chrome()  # 声明浏览器驱动
# SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
# browser = webdriver.PhantomJS(service_args=SERVICE_ARGS)

# chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
# browser = webdriver.Chrome(chrome_options=chrome_options)


wait = WebDriverWait(browser, 10)  # 显示等待设置
KEYWORD = 'iPad'
MAX_PAGE = 5

MONGO_URL = 'localhost'
MONGO_DB = 'taobao'
MONGO_COLLECTION = 'products'
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]


def index_page(page):
    """
    抓取索引页
    :param page: 页码
    """
    print('正在爬取第', page, '页')
    try:
        # 打开搜索页
        url = 'https://s.taobao.com/search?q=' + quote(KEYWORD)
        browser.get(url)
        
        # 翻页操作
        if page > 1:
            # 定位页码输入框
            input = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager div.form > input')))
            # 定位页码输入确认按钮
            submit = wait.until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager div.form > span.btn.J_Submit')))
            input.clear()
            input.send_keys(page)
            submit.click()
        
        # 确认当前页码正确
        wait.until(
            EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager li.item.active > span'), str(page)))
        # 确认所有商品信息加载成功
        wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
        
        # 获取当前页商品信息
        get_products()
    except TimeoutException:
        index_page(page)


def get_products():
    """
    提取商品数据
    """
    html = browser.page_source
    doc = pq(html)
    items = doc('#mainsrp-itemlist .items .item').items()
    for item in items:
        product = {
            'image': item.find('.pic .img').attr('data-src'),
            'price': item.find('.price').text(),
            'deal': item.find('.deal-cnt').text(),
            'title': item.find('.title').text(),
            'shop': item.find('.shop').text(),
            'location': item.find('.location').text()
        }
        print(product)
        save_to_mongo(product)


def save_to_mongo(result):
    """
    保存至MongoDB
    :param result: 结果
    """
    try:
        if db[MONGO_COLLECTION].insert(result):
            print('存储到MongoDB成功')
    except Exception:
        print('存储到MongoDB失败')


def main():
    """
    遍历每一页
    """
    try:
        for i in range(1, MAX_PAGE + 1):
            index_page(i)
    except Exception:
        print('出错啦')
    finally:
        browser.close()


if __name__ == '__main__':
    main()

正在爬取第 1 页
正在爬取第 1 页
正在爬取第 1 页
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i1/13022581/TB2HGTAcnCWBKNjSZFtXXaC3FXa_!!0-saturn_solar.jpg', 'price': '¥\n2348.00', 'deal': '2681人付款', 'title': '赠电动牙刷【2年保修】Apple/苹果\niPad\n2018款 9.7英寸平板电脑 wifi平板电脑 吃鸡玩家', 'shop': '绿森数码官方旗舰店', 'location': '浙江 杭州'}
存储到MongoDB成功
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i4/1669409267/O1CN012IKKoAl6glhcYx2_!!0-item_pic.jpg', 'price': '¥\n2388.00', 'deal': '8733人付款', 'title': '[12期分期][两年保修]Apple/苹果\niPad\n2018款 9.7英寸wifi新款平板电脑32G/128G 正品国行新品授权旗舰店', 'shop': '卓辰数码旗舰店', 'location': '浙江 杭州'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i1/2616970884/TB1u5LDczDpK1RjSZFrXXa78VXa_!!0-item_pic.jpg', 'price': '¥\n2565.00', 'deal': '14801人付款', 'title': '2018新款 Apple/苹果 9.7英寸\niPad\n智能平板电脑 正品国行新品', 'shop': '苏宁易购官方旗舰店', 'location': '江苏 南京'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/2616970884/TB1Yhk0qByWBuNkSmFPXXXguVXa_!!0-item_pic.jpg', 'price':



{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i4/31623982/TB2jpMGluySBuNjy1zdXXXPxFXa_!!0-saturn_solar.jpg', 'price': '¥\n1788.00', 'deal': '180人付款', 'title': 'iPad\n2018新款 Apple/苹果\niPad\nAir2升级版 9.7寸平板电脑2017款', 'shop': '益饰缘', 'location': '广东 深圳'}
存储到MongoDB成功
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i3/17975440/TB28k2QCv1TBuNjy0FjXXajyXXa_!!0-saturn_solar.jpg', 'price': '¥\n2100.00', 'deal': '54人付款', 'title': '正品Apple/苹果\niPad\n2018款 9.7英寸平板电脑32G/128G 新款\niPad', 'shop': '860803华晶', 'location': '浙江 杭州'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i1/2726246443/TB23iVShv5TBuNjSspmXXaDRVXa_!!2726246443.jpg', 'price': '¥\n1050.00', 'deal': '36人付款', 'title': 'Apple/苹果\nipad\nmini2平板电脑 迷你2 wifi16G 32G 7.9英寸3G版', 'shop': '童帽直销店', 'location': '广东 深圳'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/287835447/TB250lIqKuSBuNjSsplXXbe8pXa_!!287835447.jpg', 'price': '¥\n1290.00', 'deal': '260人付款', 'title': '分期App

{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i2/81391902/TB2R3jGvDtYBeNjy1XdXXXXyVXa_!!81391902.jpg', 'price': '¥\n1390.00', 'deal': '35人付款', 'title': 'Apple/苹果\niPad\nAir2 WLAN 16GB国行/港版\niPad\nair2 4G\niPad\n6代', 'shop': '亚菲数码', 'location': '上海'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/413951935/O1CN011QAGr0AtuPjR2lz_!!413951935.jpg', 'price': '¥\n1549.00', 'deal': '25人付款', 'title': 'Apple/苹果\niPad\nmini4 7.9寸平板电脑插卡三网4G版WIFI 2018新款', 'shop': '暮光女', 'location': '广东 深圳'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/1037322546/O1CN011Ug6q5FijARZaHd_!!1037322546.jpg', 'price': '¥\n1598.00', 'deal': '223人付款', 'title': 'Apple/苹果\niPad\nmini 4平板电脑7.9英寸迷你4 3网4G花呗分期免息', 'shop': '美足8689', 'location': '广东 东莞'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/1681208854/TB2OLGvfjDpK1RjSZFrXXa78VXa_!!1681208854.jpg', 'price': '¥\n1818.00', 'deal': '2人付款', 'title': 'Apple/苹果\niPad\n2018款 9.7英寸苹果平板电脑国行', 'shop': 't

{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/81391902/TB2R3jGvDtYBeNjy1XdXXXXyVXa_!!81391902.jpg', 'price': '¥\n1390.00', 'deal': '35人付款', 'title': 'Apple/苹果\niPad\nAir2 WLAN 16GB国行/港版\niPad\nair2 4G\niPad\n6代', 'shop': '亚菲数码', 'location': '上海'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/413951935/O1CN011QAGr0AtuPjR2lz_!!413951935.jpg', 'price': '¥\n1549.00', 'deal': '25人付款', 'title': 'Apple/苹果\niPad\nmini4 7.9寸平板电脑插卡三网4G版WIFI 2018新款', 'shop': '暮光女', 'location': '广东 深圳'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/1037322546/O1CN011Ug6q5FijARZaHd_!!1037322546.jpg', 'price': '¥\n1598.00', 'deal': '223人付款', 'title': 'Apple/苹果\niPad\nmini 4平板电脑7.9英寸迷你4 3网4G花呗分期免息', 'shop': '美足8689', 'location': '广东 东莞'}
存储到MongoDB成功
{'image': '//g-search2.alicdn.com/img/bao/uploaded/i4/i3/1681208854/TB2OLGvfjDpK1RjSZFrXXa78VXa_!!1681208854.jpg', 'price': '¥\n1818.00', 'deal': '2人付款', 'title': 'Apple/苹果\niPad\n2018款 9.7英寸苹果平板电脑国行', 'shop': 't

{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/4036567658/O1CN0126RPX89AHX46uHB_!!0-item_pic.jpg', 'price': '¥\n2565.00', 'deal': '231人付款', 'title': '【保修2年顺丰当天发】Apple/苹果\niPad\n2018新款 9.7英寸平板电脑Wifi版 原装国行正品 官方授权旗舰店', 'shop': '迈向数码旗舰店', 'location': '北京'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/446381428/O1CN011MQ42JHYyR272DS_!!446381428.jpg', 'price': '¥\n1838.00', 'deal': '770人付款', 'title': 'Apple/苹果\niPad\n2018款 平板电脑9.7英寸 air2升级 新款\nipad\n2018', 'shop': '旺隆数码', 'location': '广东 深圳'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/81391902/TB2WIkSuCtYBeNjSspkXXbU8VXa_!!81391902.jpg', 'price': '¥\n1550.00', 'deal': '58人付款', 'title': 'Apple/苹果 2017新款\niPad\nair3 32G/128G 9.7寸平板电脑2018新\nipad', 'shop': '亚菲数码', 'location': '上海'}
存储到MongoDB成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/197232874/TB1V1pouoOWBKNjSZKzXXXfWFXa_!!0-item_pic.jpg', 'price': '¥\n2648.00', 'deal': '319人付款', 'title': '赠电动牙刷【2年保修】Apple/苹果\niPad\nm