In [42]:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from urllib.parse import quote
from pyquery import PyQuery as pq
import pymongo
import time

In [44]:

def index_page(page):
    """
    抓取索引页
    ：param page :页码
    """
    print('正在爬取第{} 页'.format(page))
    try:
        url = "https://s.taobao.com/search?q=" + quote(KEYWORD)
        browser.get(url)
        time.sleep(5)
        if page > 1:
            # 获取跳转页输入框
            input_ = wait.until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "#mainsrp-pager div.form > input")))
            # 获取跳转页确定框
            submit = wait.until(
                EC.presence_of_element_located(By.CSS_SELECTOR, "#mainsrp-pager div.form > span.btn.J_Submit"))
            # 清空输入框的内容
            input_.clear()
            # 设置页码数
            input_.send_keys(page)
            submit.click()
        # 等待验证高亮页面的标签是否为当前页
        wait.until(
            EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager li.item.active > span'),
            str(page)))
        # 获取当前页面的内容
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
        get_products()
    except TimeoutException as e:
        # 超时重写请求
        index_page(page)

In [45]:
def get_products():
    """获取商品数据"""
    html = browser.page_source
    doc = pq(html)
    items = doc('#mainsrp-itemlist .items .item').items()
    for item in items:
        product = {
            'image':item.find('.pic .img').attr('data-src'),
            'price':item.find('.price').text(),
            'deal': item.find('.deal-cnt').text(),
            'title': item.find('.title').text(),
            'shop':item.find('.shop').text(),
            'location':item.find('.location').text()        
        }
        print(product)
        save_to_mongo(product)

In [46]:
MONGO_DB = 'taobao'
MONGO_COLLECTION = 'product'
client = pymongo.MongoClient(host='localhost', port=27017)
db = client[MONGO_DB]
collection = db[MONGO_COLLECTION]
def save_to_mongo(product):
    try:
        if collection.insert(product):
            print('保存成功')
    except Exception:
        print('保存失败！！')

In [47]:
MAX_PAGE = 100
def main():
    for i in range(1, MAX_PAGE+1):
        index_page(i)

In [51]:
if __name__ == "__main__":
    browser = webdriver.Chrome()
    wait = WebDriverWait(browser, 10)
    KEYWORD = 'iPad'
    main()

正在爬取第1 页
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/imgextra/i1/341520105/O1CN01PRpA3N1Ce89EcUmso_!!0-saturn_solar.jpg', 'price': '¥2868.00', 'deal': '2636人付款', 'title': 'Apple/苹果 iPad 2018新款 平板电脑 air2 9.7英寸32/128G wifi 正品全国联保 平板ipad', 'shop': '宏音伟业数码专营', 'location': '广东 深圳'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/1917047079/O1CN01B57i9v22AEI529xhY_!!2-item_pic.png', 'price': '¥6569.00', 'deal': '', 'title': 'Apple/苹果 10.5 英寸 iPad Air', 'shop': 'applestore官方旗舰店', 'location': '上海'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/1776456424/O1CN01sQ1Ov01xKEs2CUjzS_!!0-item_pic.jpg', 'price': '¥2838.00', 'deal': '5000+人付款', 'title': '【12期分期 全国联保】Apple/苹果 iPad 2018款中移动苹果平板电脑9.7英寸正品国行平板ipad2018', 'shop': '中国移动官方旗舰店', 'location': '浙江 杭州'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i1/1917047079/O1CN01qNZUmy22AEFzBvXlR_!!2-item_pic.png', 'price': '¥4870.00', 'deal': '', 'title': 'Apple/苹果 10.5 英寸 iPad Air', 'shop': 'applestore官方旗

  


{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/1917047079/O1CN01orvMCo22AEFyGqSrB_!!0-item_pic.jpg', 'price': '¥2921.00', 'deal': '', 'title': 'Apple/苹果 iPad mini', 'shop': 'applestore官方旗舰店', 'location': '上海'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/1776456424/O1CN01pmX5dq1xKEsvgEgHl_!!0-item_pic.jpg', 'price': '¥3258.00', 'deal': '1589人付款', 'title': '【中国移动官方旗舰店】苹果平板2019新款ipad air Apple/苹果10.5英寸中移动iPad air新款平板电脑ipad', 'shop': '中国移动官方旗舰店', 'location': '浙江 杭州'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/2616970884/O1CN01ecsyWI1IOugAvAXvV_!!0-item_pic.jpg', 'price': '¥3896.00', 'deal': '4256人付款', 'title': '【下单至高减527】2019新款 Apple/苹果iPad Air 3平板电脑 10.5英寸智能A12处理器WiFi支持pencil', 'shop': '苏宁易购官方旗舰店', 'location': '江苏 南京'}
保存成功
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i2/2200724907121/O1CN01UU4ZIG22TSwjN36uh_!!0-item_pic.jpg', 'price': '¥2868.00', 'deal': '2636人付款', 'title': 'Apple/苹果 iPad 2018新款 平板电脑 air2 9.7英寸32/128G wifi 正品全国联保 平板

{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i3/1062583331/O1CN01fY84xw1aTdrBrr6dR_!!0-item_pic.jpg', 'price': '¥1849.00', 'deal': '506人付款', 'title': '【30天试用】Jumper/中柏EZpad Go 平板电脑二合一windows系统PC11.6英寸2019新款超薄办公手写win10平板ipad', 'shop': 'jumper中柏旗舰店', 'location': '广东 深圳'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i2/898225/O1CN01LyARPd2Ad69rEyEYl_!!898225.jpg', 'price': '¥5018.00', 'deal': '220人付款', 'title': '现货速发 2018新款Apple/苹果 iPad Pro 11寸12.9全面屏平板三代', 'shop': 'ygsd', 'location': '北京'}
保存成功
{'image': '//g-search1.alicdn.com/img/bao/uploaded/i4/i4/268451883/O1CN01GgjeBB1PmSLTFHM7a_!!0-item_pic.jpg', 'price': '¥2188.00', 'deal': '127人付款', 'title': '【12期分期】送1年店铺延保/12期分期 Apple/苹果 iPad 2018新款 9.7英寸 wifi版 苹果平板电脑 air 2 3', 'shop': '三际数码官方旗舰店', 'location': '山东 济南'}
保存成功
{'image': '//g-search3.alicdn.com/img/bao/uploaded/i4/i4/25483223/O1CN011ZgAzeVgoCRt2Kq_!!25483223.jpg', 'price': '¥4888.00', 'deal': '130人付款', 'title': 'Apple/苹果 11 英寸 iPad Pro 64G/256G 4G版ipadpro12.9平

TypeError: __init__() takes 2 positional arguments but 3 were given