# 疫情简单分析
- 数据爬取 & 简单统计可视化
- 合并海内外数据 & 可视化比较
- 疫情分布地图可视化


### 数据爬取

In [117]:
import json
import requests
import pandas as pd
import numpy as np

国内数据

In [118]:
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other&callback=jQuery34109976904641590418_1591823412632&_=1591823412633'
china = requests.get(url).text
chinaday = json.loads(json.loads(china.replace('jQuery34109976904641590418_1591823412632(','')[:-1])['data'])['chinaDayList']
chinaTotal = chinaday[-1]
chinaday = pd.DataFrame(chinaday)
chinaday.head(2)

Unnamed: 0,confirm,date,dead,deadRate,heal,healRate,importedCase,noInfect,nowConfirm,nowSevere,suspect
0,41,1.13,1,2.4,0,0.0,0,0,0,0,0
1,41,1.14,1,2.4,0,0.0,0,0,0,0,0


海外数据

In [119]:
url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=FAutoGlobalStatis'
r1 = requests.get(url).json()
gloTotal = r1['data']['FAutoGlobalStatis']

In [120]:
gloTotal

{'nowConfirm': 3470584,
 'confirm': 7985127,
 'heal': 4082798,
 'dead': 431745,
 'nowConfirmAdd': 1207,
 'confirmAdd': 11303,
 'healAdd': 9917,
 'deadAdd': 179,
 'lastUpdateTime': '2020-06-16 01:35:35'}

In [121]:
url = 'https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=FAutoGlobalDailyList'
gloday = requests.get(url).json()
gday = {'date':[]}
for i in gloday['data']['FAutoGlobalDailyList']:
    gday['date'].append(i['date'])
    for key in i['all']:
        if key not in gday:
            gday[key] = []
        gday[key].append(i['all'][key])
gday = pd.DataFrame(gday)
gday.head(2)

Unnamed: 0,date,confirm,dead,heal,newAddConfirm,deadRate,healRate
0,1.28,57,0,3,0,0.0,5.26
1,1.29,74,0,3,13,0.0,4.05


美国数据

In [122]:
url = 'https://api.inews.qq.com/newsqa/v1/automation/foreign/daily/list?country=美国'
usday = requests.get(url).json()
usday = usday['data']
ustotal = usday[-1]
usday = pd.DataFrame(usday)
usday.head(2)

Unnamed: 0,confirm,confirm_add,date,dead,heal
0,5,0,1.28,0,0
1,5,0,1.29,0,0


In [123]:
# 国内
c = chinaTotal['confirm'] # 当天累计确诊
c_h = chinaTotal['heal'] # 当天累计治愈
c_d = chinaTotal['dead'] # 当天累计死亡

#美国
u = ustotal['confirm']
u_h = ustotal['heal']
u_d = ustotal['dead']

#海外
g = gloTotal['confirm']
g_h = gloTotal['heal']
g_d = gloTotal['dead']

### 简单统计可视化

In [124]:
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.faker import Faker

lab = ["死亡人数", "治愈人数", "确诊人数"]

b = Bar()
b.add_xaxis(lab)

b.add_yaxis("海外", [g_d,g_h,g])
b.add_yaxis("美国", [u_d,u_h,u])
b.add_yaxis("国内", [c_d,c_h,c])

b.reversal_axis()
b.set_series_opts(label_opts=opts.LabelOpts(position="right"))
b.set_global_opts(
    title_opts=opts.TitleOpts(title="最新疫情统计"),
    xaxis_opts=opts.AxisOpts(
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ))
b.render("bar_reversal_axis.html")
b.render_notebook()

从上图可以看出：

- 中国对疫情有很好的控制。

- 而美国的确诊人数接近海外总数的三分之一

In [125]:
# 极坐标图统计，于上图重复
# from pyecharts import options as opts
# from pyecharts.charts import Polar

# p =Polar()
# p.add_schema(
#         radiusaxis_opts=opts.RadiusAxisOpts(data=['死亡','治愈','累计确诊'], type_="category"),
#         angleaxis_opts=opts.AngleAxisOpts(is_clockwise=True, max_=g),
#     )

# p.add("国内", [c_d,c_h,c], type_="bar")
# p.add("美国", [u_d,u_h,u], type_="bar")
# p.add("海外", [g_d,g_h,g], type_="bar")
# p.set_global_opts(title_opts=opts.TitleOpts(title="疫情"))
# p.set_series_opts(label_opts=opts.LabelOpts(is_show=True))
# p.render("疫情.html")
# p.render_notebook()


### 合并海内外数据(按照日期合并)

In [126]:
# y = us, x = 海外
# ['confirm_x','confirm_y','confirm']
merged = gday.merge(usday, on="date").merge(chinaday, on="date")
merged[['confirm_x','confirm_y','confirm']].head()

Unnamed: 0,confirm_x,confirm_y,confirm
0,57,5,5997
1,74,5,7736
2,98,6,9720
3,124,6,11821
4,139,7,14411


调整日期格式

In [127]:
from pyecharts.faker import Faker
Faker.months

['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月']

In [128]:
def dateformate(x):
    mon = int(x[0:2])
    day = int(x[3:])
    return Faker.months[mon-1] + ' ' + str(day) + '日'
merged['date_f'] = merged.apply(lambda row: dateformate(row['date']), axis=1)

In [129]:
merged['date_f'][0:5]

0    1月 28日
1    1月 29日
2    1月 30日
3    1月 31日
4     2月 1日
Name: date_f, dtype: object

In [130]:
# def valuation_formula(x, y):
#     if x < y:
#         return y
#     else:
#         return x
# merged['confirm_x'] = merged.apply(lambda row: valuation_formula(row['confirm_x'], row['confirm_y']), axis=1)
# merged['dead_x'] = merged.apply(lambda row: valuation_formula(row['dead_x'], row['dead_y']), axis=1)
# merged['heal_x'] = merged.apply(lambda row: valuation_formula(row['heal_x'], row['heal_y']), axis=1)

In [131]:
merged.head()

Unnamed: 0,date,confirm_x,dead_x,heal_x,newAddConfirm,deadRate_x,healRate_x,confirm_y,confirm_add,dead_y,...,dead,deadRate_y,heal,healRate_y,importedCase,noInfect,nowConfirm,nowSevere,suspect,date_f
0,1.28,57,0,3,0,0.0,5.26,5,0,0,...,132,2.2,103,1.7,0,0,5762,1239,9239,1月 28日
1,1.29,74,0,3,13,0.0,4.05,5,0,0,...,170,2.2,124,1.6,0,0,7442,1370,12167,1月 29日
2,1.3,98,0,6,22,0.0,6.12,6,1,0,...,213,2.2,171,1.8,0,0,9336,1527,15238,1月 30日
3,1.31,124,0,11,20,0.0,8.87,6,0,0,...,259,2.2,243,2.1,0,0,11319,1795,17988,1月 31日
4,2.01,139,0,11,13,0.0,7.91,7,1,0,...,304,2.1,328,2.3,0,0,13779,2110,19544,2月 1日


In [132]:
# merged.loc[:,'date']
# len(merged['date_f'])
temp = merged['confirm_x'].copy()
t = merged.copy()

In [133]:
# arr={'confirm_x':[0]}
# s = pd.DataFrame(arr)
# len(temp)
# temp.index = [i for i in range(1,136)]
# temp[0] = 0
# temp = temp.drop(len(temp)-1)
# t['confirm_xX'] = temp

In [134]:
# t[['confirm_x','confirm_xX']]

In [135]:
def growth_rate(df,column):
    arr={column:[0]}
    s = pd.DataFrame(arr)
    temp = df[column].copy()
    temp.index = [i for i in range(1,len(temp)+1)]
    temp[0] = 0
    temp = temp.drop(len(temp)-1)
    m = df[column].copy()
    return round((m-temp)/temp*100,1)

In [136]:
#growth_rate(merged,'confirm_x')
len(merged)

139

### 数据可视化比较

In [137]:
def grow_plot(start,end,merged,col,function,lab,boolean,position):
    import pyecharts.options as opts
    from pyecharts.charts import Line

    day_list = list(merged.loc[start:end,'date_f'])

    num_g = function(merged.loc[start:end],col + '_x')
    num_c = function(merged.loc[start:end],col)
    num_u = function(merged.loc[start:end],col + '_y')

    if boolean == True:
        ma = [opts.MarkPointItem(type_="max")]
    else:
        ma = None
    # width="1600px", height="800px")
    l=Line(init_opts=opts.InitOpts())
    l.add_xaxis(xaxis_data=day_list)
    l.add_yaxis(
            series_name="海外",
            y_axis=num_g,
            markpoint_opts=opts.MarkPointOpts(data=ma),
            is_smooth=True,
            label_opts=opts.LabelOpts(is_show=False),
            linestyle_opts=opts.LineStyleOpts(width=2)
        )
    l.add_yaxis(
            series_name="国内",
            y_axis=num_c,
            markpoint_opts=opts.MarkPointOpts(data=ma),
            label_opts=opts.LabelOpts(is_show=False),
            linestyle_opts=opts.LineStyleOpts(width=2)
        )

    l.add_yaxis(
            series_name="美国",
            y_axis=num_u,
            markpoint_opts=opts.MarkPointOpts(data=ma),
            is_smooth=True,
            label_opts=opts.LabelOpts(is_show=False),
            linestyle_opts=opts.LineStyleOpts(width=2)
        )
    title = ''
    if 'confirm' in col:
        title = '疫情累计确诊'
    elif 'heal' in col:
        title = '疫情治愈'
    elif 'dead' in col:
        title = '疫情死亡'
        
    if position == 'r':
        title_opts=opts.TitleOpts(title=title, subtitle=f"{day_list[0]} 至 {day_list[-1]}", pos_right="5%")
    else:
        title_opts=opts.TitleOpts(title=title, subtitle=f"{day_list[0]} 至 {day_list[-1]}")
    l.set_global_opts(
            title_opts,
            #tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
            #xaxis_opts=opts.AxisOpts(boundary_gap=False),
            yaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(formatter='{value}'+f"{lab}"),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            )

        )
    # l.set_series_opts(
    #         markarea_opts=opts.MarkAreaOpts(
    #             data=[
    #                 opts.MarkAreaItem(name="早高峰", x=("07:30", "10:00")),
    #                 opts.MarkAreaItem(name="晚高峰", x=("17:30", "21:15")),
    #             ]
    #         )
    #     )
    return l

In [138]:
def normal(df,column):
    return df[column]

In [139]:
from pyecharts.charts import Grid

l = grow_plot(71,135,merged,'confirm',normal,'人',False,'r')
l2 = grow_plot(0,70,merged,'confirm',normal,'人',False,'l')
grid = Grid()
grid.add(l, grid_opts=opts.GridOpts(pos_left="55%"))
grid.add(l2, grid_opts=opts.GridOpts(pos_right="55%"))

grid.render_notebook()

- 整个海外的情况一直没有得到很好的控制所以一直在上升，而且从二月底开始上升的幅度越来越大

-  而中国控制的特别好，在二月底开始就趋于稳定

In [140]:
def hd_rate(df,column):
    if '_' in column:
        hd_rate = df[column] / df['confirm'+column[-2:]] 
    else:
        hd_rate = df[column] / df['confirm'] 
    return round(hd_rate*100,1)

In [141]:
l = grow_plot(0,135,merged,'heal',hd_rate,'%',True,'r')
l2 = grow_plot(0,135,merged,'dead',hd_rate,'%',True,'l')
grid = Grid()
grid.add(l, grid_opts=opts.GridOpts(pos_bottom="60%"))
grid.add(l2, grid_opts=opts.GridOpts(pos_top="60%"))
grid.render_notebook()

- 中国的治愈率一直在稳步上升，现在稳定在94%左右

- 海外的死亡率已经跟国内相近，但是治愈率现在才50%。如果以国内的情况来作为标准的话，海外的治愈率需要达到90%以上疫情就能算是得到控制了。

In [142]:
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=jQuery34103376894568990161_1592185687344&_=1592185687345'
china_map = requests.get(url).text
china_map = json.loads(json.loads(china_map.replace('jQuery34103376894568990161_1592185687344(','')[:-1])['data'])
china_map = china_map['areaTree'][0]['children']

In [226]:
provinces = []
value = []
for i in china_map:
    provinces.append(i['name'])
    value.append(i['total']['confirm'])

In [227]:
from pyecharts import options as opts
from pyecharts.charts import Map

data = [list(z) for z in zip(provinces, value)]

w = Map()
w.add("", data, "china", is_map_symbol_show=False)
w.set_global_opts(
        title_opts=opts.TitleOpts(title="最新国内疫情分布"),
        visualmap_opts=opts.VisualMapOpts(max_=max(value)),
    )
w.render_notebook()

因为湖北武汉是疫情开始的地方，那里的感染人数远远高于其他地区

In [228]:
for i in range(len(data)):
    if data[i][0] == '湖北':
        del data[i]
        break

In [229]:
def del_max(value):
    x = value
    for i in range(len(x)):
        if x[i] > 10000:
            del x[i]
            break
    return x
vv = del_max(value)

In [230]:
from pyecharts import options as opts
from pyecharts.charts import Map

w = Map()
w.add("", data, "china", is_map_symbol_show=False)
w.set_global_opts(
        title_opts=opts.TitleOpts(title="最新国内疫情分布"),
        visualmap_opts=opts.VisualMapOpts(max_=max(vv)),
    )
w.render_notebook()

不看湖北，疫情主要分布在有大城市的省份和黑龙江省

In [145]:
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_foreign&callback=jQuery34103376894568990161_1592185687348&_=1592185687349'
globa = requests.get(url).text
globa = json.loads(json.loads(globa.replace('jQuery34103376894568990161_1592185687348(','')[:-1])['data'])
globa = globa['foreignList']

In [146]:
# with open('国家对应表.json','w',encoding='utf-8') as file:
#     file.write(json.dumps(nameMap,indent=2,ensure_ascii=False))

In [234]:
temp = []
temp_v = []
orginal = {}
for i in globa:
    temp.append(i['name'])
    temp_v.append(i['confirm'])
    orginal[i['name']] = i['confirm']


In [259]:
with open("国家对应表.json",'r',encoding='utf-8') as load_f:
    load_dict = json.load(load_f)

con = []
check = {}
for key in load_dict:
    con.append(load_dict[key])
    check[load_dict[key]] = key

countries = []
values = []
for i in range(len(temp)):
    if temp[i] in con:
        countries.append(check[temp[i]])
        values.append(temp_v[i])

In [260]:
# 对应表多余的国家
# for i in con:
#     if i not in temp:
#         print(i)

In [261]:
# 配对不到的国家
# for i in temp:
#     if i not in con:
#         print(i)

In [262]:
# 手动添加报错的国家
countries.append(check['孟加拉国'])
values.append(orginal['孟加拉'])

countries.append(check['刚果民主共和国'])
values.append(orginal['刚果（金）'])

countries.append(check['刚果'])
values.append(orginal['刚果（布）'])

countries.append(check['中非'])
values.append(orginal['中非共和国'])

countries.append(check['波斯尼亚和黑塞哥维那'])
values.append(orginal['波黑'])

countries.append(check['马其顿'])
values.append(orginal['北马其顿'])

countries.append('China')
values.append(chinaTotal['confirm'])

In [263]:
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker

data = [list(z) for z in zip(countries, values)]

w = Map()
w.add("", data, "world",is_map_symbol_show=False)
w.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
range_color = ['#313695', '#ffffbf', '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026']
# ["lightskyblue",'blue', "yellow", "orangered"]
w.set_global_opts(
        title_opts=opts.TitleOpts(title="疫情分布"),
        visualmap_opts=opts.VisualMapOpts(max_=data[0][1],range_text=["High", "Low"],
            is_calculable=True,
            range_color=range_color),
    )
w.render_notebook()

美国疫情最为严重

In [264]:
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker

data = [list(z) for z in zip(countries, values)]
world_v = del_max(values)
for i in range(len(data)):
    if data[i][0] == 'United States':
        del data[i]
        break

w = Map()
w.add("", data, "world",is_map_symbol_show=False)
w.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
range_color = ['#313695', '#ffffbf', '#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026']
# ["lightskyblue",'blue', "yellow", "orangered"]
w.set_global_opts(
        title_opts=opts.TitleOpts(title="疫情分布"),
        visualmap_opts=opts.VisualMapOpts(max_=data[0][1],range_text=["High", "Low"],
            is_calculable=True,
            range_color=range_color),
    )
w.render_notebook()

出去美国，西班牙最严重，然后是意大利，英国。然后就是中国，德国，法国，伊朗，加拿大...