In [2]:
#import packages
import requests
import pandas as pd
import time
import json
pd.set_option('max_rows',500)
import pyecharts
import pyecharts.options as opts
from pyecharts.charts import *
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['STHeiti']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.dpi'] = 100

## 爬取数据

In [2]:
headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-total'
r = requests.get(url , headers=headers)
data_json = json.loads(r.text)
data = data_json['data']

In [3]:
def get_data(data, info_list):
    info = pd.DataFrame(data)[info_list]  # 主要信息

    today_data = pd.DataFrame([i['today'] for i in data])  # 生成today的数据
    today_data.columns = ['today_' + i for i in today_data.columns]  # 修改列名

    total_data = pd.DataFrame([i['total'] for i in data])  # 生成total的数据
    total_data.columns = ['total_' + i for i in total_data.columns]  # 修改列名

    return pd.concat([info, total_data, today_data], axis=1)  # info、today和total横向合并最终得到汇总的数据

def save_data(data,name): # 定义保存数据方法
    file_name = './data/' + name+'_'+time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.csv'
    data.to_csv(file_name,index=None,encoding='utf_8_sig')
    print(file_name+' 保存成功！')

In [4]:
# 保存每日省份数据
data_province = data['areaTree'][2]['children']
today_province = get_data(data_province,['id','lastUpdateTime','name'])
save_data(today_province,'today_province')
# 保存每日世界数据
areaTree = data['areaTree'] # 取出areaTree
today_world = get_data(areaTree,['id','lastUpdateTime','name'])
save_data(today_world,'today_world')
# 保存中国每日的数据
chinaDayList = data['chinaDayList'] # 取出chinaDayList
alltime_China = get_data(chinaDayList,['date','lastUpdateTime'])
save_data(alltime_China,'alltime_China')

./data/today_province_2020_06_29.csv 保存成功！
./data/today_world_2020_06_29.csv 保存成功！
./data/alltime_China_2020_06_29.csv 保存成功！


In [None]:
#获得各个国家历史数据
country_dict = { key:value for key,value in zip(today_world['id'], today_world['name'])}
start = time.time()
for country_id in country_dict:  # 遍历每个国家的编号

    try:
        # 按照编号访问每个国家的数据地址，并获取json数据
        url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-by-area-code?areaCode=' + country_id
        r = requests.get(url, headers=headers)
        json_data = json.loads(r.text)

        # 生成每个国家的数据
        country_data = get_data(json_data['data']['list'], ['date'])
        country_data['name'] = country_dict[country_id]

        # 数据叠加
        if country_id == '9577772':
            alltime_world = country_data
        else:
            alltime_world = pd.concat([alltime_world, country_data])

        print('-' * 20, country_dict[country_id], '成功', country_data.shape, alltime_world.shape,
              ',累计耗时:', round(time.time() - start), '-' * 20)

        time.sleep(10)

    except:
        print('-' * 20, country_dict[country_id], 'wrong', '-' * 20)
save_data(alltime_world,'alltime_world')

-------------------- 突尼斯 成功 (103, 15) (103, 15) ,累计耗时: 0 --------------------
-------------------- 塞尔维亚 成功 (108, 15) (211, 15) ,累计耗时: 10 --------------------
-------------------- 中国 成功 (144, 15) (355, 15) ,累计耗时: 21 --------------------
-------------------- 日本 成功 (135, 15) (490, 15) ,累计耗时: 31 --------------------
-------------------- 泰国 成功 (119, 15) (609, 15) ,累计耗时: 41 --------------------
-------------------- 泰国 wrong --------------------
-------------------- 新加坡 成功 (136, 15) (745, 15) ,累计耗时: 51 --------------------
-------------------- 韩国 成功 (159, 15) (904, 15) ,累计耗时: 61 --------------------
-------------------- 澳大利亚 成功 (118, 15) (1022, 15) ,累计耗时: 72 --------------------
-------------------- 德国 成功 (125, 15) (1147, 15) ,累计耗时: 82 --------------------
-------------------- 美国 成功 (130, 15) (1277, 15) ,累计耗时: 92 --------------------
-------------------- 马来西亚 成功 (122, 15) (1399, 15) ,累计耗时: 102 --------------------
-------------------- 越南 成功 (93, 15) (1492, 15) ,累计耗时: 112 --------------------


In [None]:
# 保存各省历史数据
province_dict = {num:name for num,name in zip(today_province['id'],today_province['name'])}

start = time.time()
for province_id in province_dict:  # 遍历各省编号

    try:
        # 按照省编号访问每个省的数据地址，并获取json数据
        url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-by-area-code?areaCode=' + province_id
        r = requests.get(url, headers=headers)
        data_json = json.loads(r.text)

        # 提取各省数据，然后写入各省名称
        province_data = get_data(data_json['data']['list'], ['date'])
        province_data['name'] = province_dict[province_id]

        # 合并数据
        if province_id == '420000':
            alltime_province = province_data
        else:
            alltime_province = pd.concat([alltime_province, province_data])

        print('-' * 20, province_dict[province_id], '成功',
              province_data.shape, alltime_province.shape,
              ',累计耗时:', round(time.time() - start), '-' * 20)

        # 设置延迟等待
        time.sleep(10)

    except:
        print('-' * 20, province_dict[province_id], 'wrong', '-' * 20)
save_data(alltime_province,'alltime_province')

## 数据可视化

### 整体确诊概况

In [3]:
###############################################################################
####data01
##################################################################################
#world_data = pd.read_csv('./data/today_world_2020_06_29.csv')
world_data = pd.read_csv('./data/' + 'today_world'+'_'+time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.csv')
country_name_mapping = pd.read_csv('./data/county_name_mapping.csv',encoding='gbk')
# 计算现有确诊
world_data['today_storeConfirm'] = world_data['total_confirm'] - world_data['total_heal'] - world_data['total_dead']
world_data['eng_name'] = world_data['name'].replace(country_name_mapping['中文'].values 
                                                    ,country_name_mapping['英文'].values)
world_data.head()

Unnamed: 0,id,lastUpdateTime,name,total_confirm,total_suspect,total_heal,total_dead,total_severe,total_input,today_confirm,today_suspect,today_heal,today_dead,today_severe,today_storeConfirm,today_input,eng_name
0,9577772,2020-06-29 00:02:30,突尼斯,1169,0,1029,50,0,0.0,5.0,,0.0,0.0,,90,,Tunisia
1,9507896,2020-06-29 00:02:30,塞尔维亚,14046,0,12464,270,0,0.0,254.0,,0.0,0.0,,1312,,Serbia
2,0,2020-06-29 17:18:23,中国,85208,0,80045,4648,0,1907.0,16.0,,19.0,0.0,,515,5.0,China
3,1,2020-06-15 07:16:58,日本,18241,0,16298,940,0,0.0,75.0,,63.0,0.0,,1003,0.0,Japan
4,2,2020-06-29 15:02:48,泰国,3169,0,3053,58,0,0.0,7.0,,0.0,0.0,,58,,Thailand


In [4]:
# 获取数据矩阵
heatmap_datat = world_data[['eng_name','today_confirm']].values.tolist()
heatmap_datat[:10]

[['Tunisia', 5.0],
 ['Serbia', 254.0],
 ['China', 16.0],
 ['Japan', 75.0],
 ['Thailand', 7.0],
 ['Singapore', 202.0],
 ['Korea', 42.0],
 ['Australia', 81.0],
 ['Germany', 209.0],
 ['United States', 40540.0]]

In [6]:
world_map = Map().add(series_name = "累计确诊人数", 
                 data_pair = heatmap_datat, 
                 maptype = "world", 
                 is_map_symbol_show = False)

world_map.set_series_opts(label_opts=opts.LabelOpts(is_show=False))  
world_map.set_global_opts(title_opts = opts.TitleOpts(title="世界各国家累计确诊人数地图"), # 设置图标题
                     visualmap_opts = opts.VisualMapOpts(pieces=[ # 自定义分组的分点和颜色
                                                               {"min": 50000,"color":"#800000"}, 
                                                               {"min": 20000, "max": 49999, "color":"#B22222"}, 
                                                               {"min": 10000, "max": 19999,"color":"#CD5C5C"}, 
                                                               {"min": 1000, "max": 9999, "color":"#BC8F8F"}, 
                                                               {"max": 999, "color":"#FFE4E1"}, 
                                                              ],
                     is_piecewise = True))  # 显示分段式图例
#world_map.render_notebook()
##########################################################################################################################
################map01
############################################################################################################################
world_map.render('./maps/'+ 'world_map_t' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

  super().__init__(init_opts=init_opts)


'C:\\Users\\admin\\maps\\world_map_t_2020_06_29.html'

In [None]:
# 获取数据矩阵
heatmap_data = world_data[['eng_name','total_confirm']].values.tolist()
heatmap_data[:10]

In [None]:
world_map = Map().add(series_name = "累计确诊人数", 
                 data_pair = heatmap_data, 
                 maptype = "world", 
                 is_map_symbol_show = False)

world_map.set_series_opts(label_opts=opts.LabelOpts(is_show=False))  
world_map.set_global_opts(title_opts = opts.TitleOpts(title="世界各国家累计确诊人数地图"), # 设置图标题
                     visualmap_opts = opts.VisualMapOpts(pieces=[ # 自定义分组的分点和颜色
                                                               {"min": 500000,"color":"#800000"}, 
                                                               {"min": 200000, "max": 499999, "color":"#B22222"}, 
                                                               {"min": 100000, "max": 199999,"color":"#CD5C5C"}, 
                                                               {"min": 10000, "max": 99999, "color":"#BC8F8F"}, 
                                                               {"max": 9999, "color":"#FFE4E1"}, 
                                                              ],
                     is_piecewise = True))  # 显示分段式图例
#world_map.render_notebook()
##########################################################################################################################
################map01
############################################################################################################################
world_map.render('./maps/'+ 'world_map' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

In [None]:
##########################################################################################################################
###############date02
##########################################################################################################################
#china_data = pd.read_csv('./data/today_province_2020_06_29.csv')
china_data = pd.read_csv('./data/' + 'today_province'+'_'+time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.csv')
china_data.head()

In [None]:
area_map = Map()
area_map.add("",[list(z) for z in zip(list(china_data["name"]), list(china_data["total_confirm"]))], "china",is_map_symbol_show=False)
area_map.set_global_opts(title_opts=opts.TitleOpts(title="国内各省分确诊人数热力图"),visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
                pieces = [
                        {"min": 10000 , "label": '>10000',"color": "#893448"}, 
                        {"min": 1000, "max": 9999, "label": '1000-9999',"color": "#ff585e"},
                        {"min": 500, "max": 999, "label": '500-999',"color": "#fb8146"},
                        {"min": 100, "max": 499, "label": '100-499',"color": "#ffb248"},
                        {"min": 0, "max": 99, "label": '0-99',"color" : "#fff2d1" }]))
#area_map.render_notebook()
################################################################################################################################
################map02
############################################################################################################################
area_map.render('./maps/'+ 'china_map' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

### 病死及治愈情况分析

In [None]:
# 累计治愈率
world_data['heal_rate'] = world_data['total_heal'] / world_data['total_confirm']
cond = world_data['total_confirm'] >= 10000
sort_data = world_data.loc[cond,['name','heal_rate']]\
    .sort_values(by='heal_rate',ascending=False).head(10)
name = sort_data['name'].tolist()
heal_rate_ = sort_data['heal_rate'].tolist()
heal_rate = []
for num in heal_rate_:
    heal_rate.append(round(num, 4))

In [None]:
bar = Bar()\
    .add_xaxis(name)\
    .add_yaxis("heal_rate", heal_rate, color = 'green')\
    .set_global_opts(title_opts=opts.TitleOpts(title='全球治愈率最高的十个国家（或地区）'))
################################################################################################################################
################map03
############################################################################################################################
bar.render('./maps/'+ 'bar' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

In [None]:
dead_count_data = world_data[['name','total_dead']][world_data['total_dead'] >3000]
other  = world_data['total_dead'].sum() - \
    world_data.loc[world_data['total_dead'] >3000,'total_dead'].sum()
dead_count_data = dead_count_data.append(pd.DataFrame(data=[['其他',other]],columns=['name','total_dead']))
rank_data = dead_count_data[['name','total_dead']].sort_values(by='total_dead',ascending=False).values
rank_data[:5]

In [None]:
pie = Pie().add("累计死亡人数分布", # 添加提示框标签
                rank_data, # 输入数据
                radius = ["20%", "70%"],  # 设置内半径和外半径
                center = ["60%", "60%"],  # 设置圆心位置
                rosetype = "radius")   # 玫瑰图模式，通过半径区分数值大小，角度大小表示占比
#全局和系列配置信息
pie.set_global_opts(title_opts = opts.TitleOpts(title="世界国家累计死亡人数玫瑰图",  # 设置图标题
                                                pos_right = '40%'),  # 图标题的位置
                    legend_opts = opts.LegendOpts( # 设置图例
                                                orient='vertical', # 垂直放置图例
                                                pos_right="85%", # 设置图例位置
                                                pos_top="15%"))

pie.set_series_opts(label_opts = opts.LabelOpts(formatter="{b} : {d}%")) # 设置标签文字形式为（国家：占比（%））

#pie.render_notebook()
################################################################################################################################
################map03
############################################################################################################################
pie.render('./maps/'+ 'rose' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

### 每日新增人数趋势

In [None]:
##########################################################################################################################
###############date03
##########################################################################################################################
world_all_data = pd.read_csv('./data/' + 'alltime_world' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.csv')
#world_all_data = pd.read_csv('./data/alltime_world_2020_06_29.csv')
world_all_data.head()

In [None]:
##########################################################################################################################
###############date04
##########################################################################################################################
cond = world_all_data['date'] >= '2020-06-01'
date_list = world_all_data[cond].groupby(['date'])['today_confirm'].sum().index.tolist()
today_confirm_list = world_all_data[cond].groupby(['date'])['today_confirm'].sum().values.tolist()
today_heal_list = world_all_data[cond].groupby(['date'])['today_heal'].sum().values.tolist()

In [None]:
line1 = Line()\
        .add_xaxis(date_list)\
        .add_yaxis("today_confirm",today_confirm_list)\
        .add_yaxis("today_heal", today_heal_list)\
        .set_global_opts(title_opts=opts.TitleOpts(title="6月新增确诊 VS 新增治愈"))
#line1.render_notebook()
################################################################################################################################
################map04
############################################################################################################################
line1.render('./maps/'+ 'line' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')

### 4月各国确诊人数变化图(动态图

In [None]:
country_list = ['巴西', '伊朗', '俄罗斯', '土耳其', '德国', '法国', '英国', '意大利', '西班牙', '美国']
target_data = world_all_data[world_all_data['name'].isin(country_list)]

In [None]:
from datetime import datetime,timedelta
##########################################################################################################################
###############date05
##########################################################################################################################
time_list = [(datetime(2020, 5, 1) + timedelta(i)).strftime('%Y-%m-%d') for i in range(30)]
color_list = ['brown','peru','orange','blue','green',
              'red','yellow','teal','pink','orchid']
country_color = pd.DataFrame()
country_color['country'] = country_list
country_color['color'] = color_list

In [None]:
import matplotlib.ticker as ticker

def barh_draw(day):
    
    # 提取每一天的数据
    draw_data = target_data[target_data['date']==day][['name','total_confirm']].sort_values(by='total_confirm',ascending=True)
    
    # 清空当前的绘图
    ax.clear()
    
    # 绘制条形图
    ax.barh(draw_data['name'],draw_data['total_confirm'], color=[country_color[country_color['country']==i]['color'].values[0] for i in draw_data['name']])
    
    # 数值标签的间距
    dx = draw_data['total_confirm'].max()/200
    
    # 添加数值标签
    for j, (name, value) in enumerate(zip(draw_data['name'], draw_data['total_confirm'])):
        
        ax.text(value+dx, j, f'{value:,.0f}', size=10, ha='left', va='center')
        
    # 添加日期标签
    ax.text(draw_data['total_confirm'].max()*0.75, 0.4, day, color='#777777',size=40, ha='left')
    
    # 设置刻度标签的格式
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
    
    # 设置刻度的位置
    ax.xaxis.set_ticks_position('top')

    # 设置刻度标签的颜色和大小
    ax.tick_params(axis='x',colors='#777777', labelsize=15)
    
    # 添加网格线
    ax.grid(which='major',axis='x',linestyle='-')
    
    # 添加图标题
    ax.text(0, 11, '上个月世界各国家累计确诊人数动态条形图',size=20, ha='left')
    
    # 去除图边框
    plt.box(False)
    
    plt.rcParams['font.sans-serif']=['SimHei']
    
    # 关闭绘图框
    plt.close()

In [None]:
# 动态绘图
fig, ax = plt.subplots(figsize=(12, 8))

import matplotlib.animation as animation
from IPython.display import HTML

animator = animation.FuncAnimation(fig, barh_draw, frames=time_list, interval=200)
################################################################################################################################
################map05
############################################################################################################################
#line1.render('./maps/'+ 'line' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.html')
animator.save('d:/ipython/maps/'+ 'bar' + '_' + time.strftime('%Y_%m_%d',time.localtime(time.time()))+'.gif',writer='pillow')
print('bar chart save successful')
#HTML(animator.to_jshtml())