## 使用python獲取京東商品數據以及好評度爬蟲傻瓜式入門（二）
本章內容主要解決，如何多頁獲取手機&電腦數據，獲取評價以及好評率等信息
- https://ek21.com/news/tech/58573/

## 如何獲取評論信息？

首先我们来到京东随便搜索一个商品，并且进入商品具体页面，检测network, 搜索 comment, 不难发现这个页面获取了评论有关的json数据：

> https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv4645&productId=100005171461&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1

In [1]:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re, requests, csv, codecs
from requests.exceptions import RequestException
from bs4 import BeautifulSoup

In [2]:
# 1、要取所有的电脑数据，首先得获得所有电脑的网页源码，以及url地址如下：
def download(url, headers, num_retries=3):
    print("download", url)
    try:
        response = requests.get(url, headers=headers)
        print(response.status_code)
        # 如果相应的状态码为：200 那么return 返回网站源码，如果访问非200则调用重试
        if response.status_code == 200:
            return response.content
        return None
    except RequestException as e:
        print(e.response)
        html = ""
        if hasattr(e.response, 'status_code'):
            code = e.response.status_code
            print('error code', code)
            if num_retries > 0 and 500 <= code < 600:
                html = download(url, headers, num_retries - 1)
        else:
            code = None
    return html

In [3]:
def get_json(url):
    data = requests.get(url).json()
    print(data)
    result = data['CommentsCount']
    for i in result:
        return i["CommentCountStr"], i["GoodRateShow"]

In [4]:
def find_Computer(url, headers):
    r = download(url, headers=headers)
    # print(r)
    page = BeautifulSoup(r, "lxml")
    #print(page.prettify())

    all_items = page.find_all('li', attrs={'class':'gl-item'})
    
    with open("Computer.csv", 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        fields = ('ID', 'NAME', 'PRICE', 'Comment#', 'Good Ratio')
        writer.writerow(fields)
        # 4、接下来通过for循环将找到的所有电脑源码进行循环。
        for all in all_items:
            Computer_id = all["data-sku"]
            Computer_name = all.find('div', attrs={'class': 'p-name p-name-type-2'}).find('em').text
            _price = all.find('div', attrs={'class': 'p-price'}).find('strong')
            Computer_price = _price.get('data-price', _price.find('i').text)

            # 取每台電腦的Json數據(包含 評價等等信息)
            Comment = f"https://club.jd.com/comment/productCommentSummaries.action?referenceIds={Computer_id}"
            comment_count, good_rate = get_json(Comment)
            
            print(f"ID：{Computer_id}")
            print(f"Name：{Computer_name}")
            print(f"Price：{Computer_price}元")
            print(f"Comment#：", comment_count)
            print(f"Good Ratio：{good_rate}\n")
            
            row = []
            row.append(Computer_id)
            row.append(Computer_name)
            row.append(str(Computer_price) + "元")
            row.append(comment_count)
            row.append(good_rate)
            writer.writerow(row)

In [5]:
def main():
    headers = {
    'User-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36",
    "referer": "https://search.jd.com"
    }

    URL="https://search.jd.com/Search?keyword=lenovo&enc=utf-8&wq=lenovo&pvid=271f6c114446474f91249bbd788b7090"
    find_Computer(URL, headers=headers)

if __name__ == '__main__':
    main()

download https://search.jd.com/Search?keyword=lenovo&enc=utf-8&wq=lenovo&pvid=271f6c114446474f91249bbd788b7090
200
{'CommentsCount': [{'SkuId': 100005171461, 'ProductId': 100005171461, 'ShowCount': 15070, 'ShowCountStr': '1.5万+', 'CommentCountStr': '17万+', 'CommentCount': 175145, 'AverageScore': 5, 'DefaultGoodCountStr': '13万+', 'DefaultGoodCount': 132444, 'GoodCountStr': '3.9万+', 'GoodCount': 39989, 'AfterCount': 2721, 'OneYear': 0, 'AfterCountStr': '2700+', 'VideoCount': 2937, 'VideoCountStr': '2900+', 'GoodRate': 0.93, 'GoodRateShow': 93, 'GoodRateStyle': 139, 'GeneralCountStr': '1100+', 'GeneralCount': 1174, 'GeneralRate': 0.027, 'GeneralRateShow': 3, 'GeneralRateStyle': 4, 'PoorCountStr': '1500+', 'PoorCount': 1538, 'SensitiveBook': 0, 'PoorRate': 0.043, 'PoorRateShow': 4, 'PoorRateStyle': 7}]}
ID：100005171461
Name：联想(Lenovo)小新Pro13.3英寸全面屏超轻薄笔记本电脑(标压锐龙R5-3550H 16G 512G 2.5K QHD 100%sRGB)银
Price：4499.00元
Comment#： 17万+
Good Ratio：93

{'CommentsCount': [{'SkuId': 100004286189, 'Prod

{'CommentsCount': [{'SkuId': 100003383325, 'ProductId': 100003383325, 'ShowCount': 2822, 'ShowCountStr': '2800+', 'CommentCountStr': '10万+', 'CommentCount': 107844, 'AverageScore': 5, 'DefaultGoodCountStr': '8.5万+', 'DefaultGoodCount': 85766, 'GoodCountStr': '3.7万+', 'GoodCount': 37018, 'AfterCount': 651, 'OneYear': 0, 'AfterCountStr': '600+', 'VideoCount': 374, 'VideoCountStr': '300+', 'GoodRate': 0.97, 'GoodRateShow': 97, 'GoodRateStyle': 145, 'GeneralCountStr': '200+', 'GeneralCount': 278, 'GeneralRate': 0.007, 'GeneralRateShow': 1, 'GeneralRateStyle': 1, 'PoorCountStr': '600+', 'PoorCount': 616, 'SensitiveBook': 0, 'PoorRate': 0.023, 'PoorRateShow': 2, 'PoorRateStyle': 4}]}
ID：100003383325
Name：联想(Lenovo)天逸510S 英特尔酷睿i3 个人商务台式机电脑整机(i3-9100 8G 1T WiFi  三年上门 Win10)21.5英寸
Price：2899.00元
Comment#： 10万+
Good Ratio：97

{'CommentsCount': [{'SkuId': 100008492048, 'ProductId': 100008492048, 'ShowCount': 11879, 'ShowCountStr': '1.1万+', 'CommentCountStr': '11万+', 'CommentCount': 116679, 'Avera

{'CommentsCount': [{'SkuId': 8577742, 'ProductId': 8577742, 'ShowCount': 528, 'ShowCountStr': '500+', 'CommentCountStr': '2.1万+', 'CommentCount': 21769, 'AverageScore': 5, 'DefaultGoodCountStr': '1.7万+', 'DefaultGoodCount': 17475, 'GoodCountStr': '1万+', 'GoodCount': 10777, 'AfterCount': 214, 'OneYear': 0, 'AfterCountStr': '200+', 'VideoCount': 70, 'VideoCountStr': '70+', 'GoodRate': 0.96, 'GoodRateShow': 96, 'GoodRateStyle': 144, 'GeneralCountStr': '100+', 'GeneralCount': 114, 'GeneralRate': 0.01, 'GeneralRateShow': 1, 'GeneralRateStyle': 2, 'PoorCountStr': '200+', 'PoorCount': 245, 'SensitiveBook': 0, 'PoorRate': 0.03, 'PoorRateShow': 3, 'PoorRateStyle': 4}]}
ID：8577742
Name：联想（Lenovo）AIO 520 致美一体机台式电脑23.8英寸（AMD A6-9500E 4G 1T WIFI 蓝牙 三年上门 Win10）银
Price：2899.00元
Comment#： 2.1万+
Good Ratio：96

{'CommentsCount': [{'SkuId': 57773460021, 'ProductId': 57773460021, 'ShowCount': 1164, 'ShowCountStr': '1100+', 'CommentCountStr': '2.1万+', 'CommentCount': 21274, 'AverageScore': 5, 'DefaultGoodC