# 爬取腾讯数据制作感染热力图

## 0,环境准备

1，anaconda默认安装所有需要的库，通过import来验证，或使用pip安装requests，json，pandas，jupyter notebook

2，准备chrome或火狐浏览器

In [71]:
import requests
import json
import pandas as pd
requests.__version__, json.__version__, pd.__version__

('2.21.0', '2.0.9', '0.23.4')

## 1、分析数据源

1，通过浏览器打开数据源：https://news.qq.com/zt2020/page/feiyan.htm?from=timeline&isappinstalled=0

2，打开开发者选项，分析network流；

3，简述动态网页和静态网页的区别；

## 2、抓取数据

1，找到数据流，用python获取，查看内容；

2，用json加载数据，用可视化工具分析http://www.bejson.com/jsonviewernew/

In [None]:
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34105042688433769531_1581048970987&_=1581048970988'
reponse = requests.get(url=url)
data = reponse.text.split('jQuery34105042688433769531_1581048970987')[1][1:-1]
data = json.loads(data)
data

In [None]:
data = json.loads(data['data'])
data

3，用json可视化工具，查看数据

4，封装函数：利用数据源得出的链接，获取到原始数据，然后用json处理成python字典并返回

In [74]:
def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34105042688433769531_1581048970987&_=1581048970988'
    reponse = requests.get(url=url)
    data = reponse.text.split('jQuery34105042688433769531_1581048970987')[1][1:-1]
    data = json.loads(data)
    if data['ret'] == 0:
        return json.loads(data['data'])

5，利用函数获取到到数据，看一下国内感染数据及今日新增感染情况

In [75]:
data = catch_data()
lastUpdateTime = data['lastUpdateTime']
chinaTotal = data['chinaTotal']
chinaAdd = data['chinaAdd']
chinaTotal

{'confirm': 37251, 'suspect': 28942, 'dead': 812, 'heal': 2685}

In [76]:
chinaAdd

{'confirm': 2653, 'suspect': 1285, 'dead': 89, 'heal': 633}

## 3、利用pandas清洗国内感染数据并保存到csv

In [77]:
# 定义数据处理函数，给定一个str，用eval变成字典，取其中元素并返回
def confirm(x):
    confirm = eval(str(x))['confirm']
    return confirm
def suspect(x):
    suspect = eval(str(x))['suspect']
    return suspect
def dead(x):
    dead = eval(str(x))['dead']
    return dead
def heal(x):
    heal =  eval(str(x))['heal']
    return heal

Pandas中文网：https://www.pypandas.cn/

In [78]:
#细致筛选数据，生成list
def extract_china(china_data):
    china_list = []
    for a in range(len(china_data)):
        province = china_data[a]['name']
        province_list = china_data[a]['children']
        for b in range(len(province_list)):
            city = province_list[b]['name']
            total = province_list[b]['total']
            today = province_list[b]['today']
            china_dict = {}
            china_dict['province'] = province
            china_dict['city'] = city
            china_dict['total'] = total
            china_dict['today'] = today
            china_list.append(china_dict)

    china_data = pd.DataFrame(china_list)
    # 函数映射
    china_data['confirm'] = china_data['total'].map(confirm)
    china_data['suspect'] = china_data['total'].map(suspect)
    china_data['dead'] = china_data['total'].map(dead)
    china_data['heal'] = china_data['total'].map(heal)
    china_data['addconfirm'] = china_data['today'].map(confirm)
    china_data['addsuspect'] = china_data['today'].map(suspect)
    china_data['adddead'] = china_data['today'].map(dead)
    china_data['addheal'] = china_data['today'].map(heal)
    china_data = china_data[["province","city","confirm","suspect","dead","heal","addconfirm","addsuspect","adddead","addheal"]]
    return china_data

利用上面的函数，原始数据中‘areaTree’中的‘children’清洗出来

In [79]:
areaTree = data['areaTree']
china_data = areaTree[0]['children']#0-中国
china_data = extract_china(china_data)
china_data.head()

Unnamed: 0,province,city,confirm,suspect,dead,heal,addconfirm,addsuspect,adddead,addheal
0,湖北,武汉,14982,0,608,877,1379,0,0,0
1,湖北,孝感,2436,0,29,45,123,0,0,0
2,湖北,黄冈,2141,0,43,135,100,0,0,0
3,湖北,荆州,997,0,13,33,56,0,0,0
4,湖北,襄阳,988,0,7,40,81,0,0,0


把清洗出的数据，存成csv。

In [80]:
china_data.to_csv('china_data.csv')

## 4*、数据可视化

再进一步，把国内数据按照省和感染人数重新组合，为进一步可视化作准备

In [81]:
#数据处理
area_data = china_data.groupby("province")["confirm"].sum().reset_index()
area_data.columns = ["province","confirm"]
area_data.head()

Unnamed: 0,province,confirm
0,上海,292
1,云南,140
2,内蒙古,54
3,北京,315
4,台湾,17


使用area_data绘图

In [82]:
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.globals import ThemeType

area_map = Map(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS))
area_map.add("",[list(z) for z in zip(list(area_data["province"]), list(area_data["confirm"]))], "china",is_map_symbol_show=False)
area_map.set_global_opts(title_opts=opts.TitleOpts(title="2019_nCoV中国疫情地图"),visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                pieces = [
                        {"min": 1001 , "label": '>1000',"color": "#893448"}, #不指定 max，表示 max 为无限大
                        {"min": 500, "max": 1000, "label": '500-1000',"color": "#ff585e"},
                        {"min": 101, "max": 499, "label": '101-499',"color": "#fb8146"},
                        {"min": 10, "max": 100, "label": '10-100',"color": "#ffb248"},
                        {"min": 0, "max": 9, "label": '0-9',"color" : "#fff2d1" }]))
area_map.render_notebook()