# 读取新冠肺炎数据

## 导入模块

In [1]:
# https://blog.csdn.net/weixin_43130164/article/details/104113559?depth_1-utm_source=distribute.pc_relevant_right.none-task&utm_source=distribute.pc_relevant_right.none-task
import time 
import json
import requests
from datetime import datetime
import pandas as pd 
import numpy as np 

## 抓取数据

In [2]:
def catch_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    reponse = requests.get(url=url).json()
    #返回数据字典
    data = json.loads(reponse['data'])
    return data

In [3]:
data = catch_data()
data.keys()

dict_keys(['lastUpdateTime', 'chinaTotal', 'chinaAdd', 'isShowAdd', 'showAddSwitch', 'areaTree', 'chinaDayList', 'chinaDayAddList', 'dailyNewAddHistory', 'dailyHistory', 'wuhanDayList', 'articleList'])

## 数据处理

In [4]:
# 数据集包括["国内总量","国内新增","更新时间","数据明细","每日数据","每日新增"]

lastUpdateTime = data['lastUpdateTime']
chinaTotal = data['chinaTotal']
chinaAdd = data['chinaAdd']
print(chinaTotal)
print(chinaAdd)

{'confirm': 81385, 'heal': 71305, 'dead': 3253, 'nowConfirm': 6827, 'suspect': 104, 'nowSevere': 2136, 'importedCase': 234}
{'confirm': 150, 'heal': 758, 'dead': 3, 'nowConfirm': -611, 'suspect': -1, 'nowSevere': -178, 'importedCase': 45}


### 国内数据处理
#### 第一步

In [5]:
# 数据明细，数据结构比较复杂，一步一步打印出来看，先明白数据结构
areaTree = data['areaTree']
# 国内数据
china_data = areaTree[0]['children']
china_list = []
for a in range(len(china_data)):
    province = china_data[a]['name']
    province_list = china_data[a]['children']
    for b in range(len(province_list)):
        city = province_list[b]['name']
        total = province_list[b]['total']
        today = province_list[b]['today']
        china_dict = {}
        china_dict['province'] = province
        china_dict['city'] = city
        china_dict['total'] = total
        china_dict['today'] = today
        china_list.append(china_dict)
        
china_data = pd.DataFrame(china_list)
china_data.head()

Unnamed: 0,province,city,total,today
0,湖北,武汉,"{'confirm': 50005, 'suspect': 0, 'dead': 2498,...","{'confirm': 0, 'confirmCuts': 0, 'isUpdated': ..."
1,湖北,孝感,"{'confirm': 3518, 'suspect': 0, 'dead': 128, '...","{'confirm': 0, 'confirmCuts': 0, 'isUpdated': ..."
2,湖北,黄冈,"{'confirm': 2907, 'suspect': 0, 'dead': 125, '...","{'confirm': 0, 'confirmCuts': 0, 'isUpdated': ..."
3,湖北,荆州,"{'confirm': 1580, 'suspect': 0, 'dead': 50, 'd...","{'confirm': 0, 'confirmCuts': 0, 'isUpdated': ..."
4,湖北,鄂州,"{'confirm': 1394, 'suspect': 0, 'dead': 58, 'd...","{'confirm': 0, 'confirmCuts': 0, 'isUpdated': ..."


#### 第二步

In [10]:
# 定义数据处理函数
def confirm(x):
    confirm = eval(str(x))['confirm']
    return confirm
def suspect(x):
    suspect = eval(str(x))['suspect']
    return suspect
def dead(x):
    dead = eval(str(x))['dead']
    return dead
def heal(x):
    heal =  eval(str(x))['heal']
    return heal
# 函数映射
china_data['confirm'] = china_data['total'].map(confirm)
china_data['suspect'] = china_data['total'].map(suspect)
china_data['dead'] = china_data['total'].map(dead)
china_data['heal'] = china_data['total'].map(heal)
china_data['addconfirm'] = china_data['today'].map(confirm)
#china_data['addsuspect'] = china_data['today'].map(suspect)
#china_data['adddead'] = china_data['today'].map(dead)
#china_data['addheal'] = china_data['today'].map(heal)
#china_data = china_data[["province","city","confirm","suspect","dead","heal","addconfirm","addsuspect","adddead","addheal"]]
china_data = china_data[["province","city","confirm","suspect","dead","heal","addconfirm"]]
china_data.head()

Unnamed: 0,province,city,confirm,suspect,dead,heal,addconfirm
0,湖北,武汉,50005,0,2498,41389,0
1,湖北,孝感,3518,0,128,3349,0
2,湖北,黄冈,2907,0,125,2782,0
3,湖北,荆州,1580,0,50,1517,0
4,湖北,鄂州,1394,0,58,1303,0


### 国际数据处理

In [16]:
global_data = pd.DataFrame(data['areaTree'])
global_data['confirm'] = global_data['total'].map(confirm)
global_data['suspect'] = global_data['total'].map(suspect)
global_data['dead'] = global_data['total'].map(dead)
global_data['heal'] = global_data['total'].map(heal)
global_data['addconfirm'] = global_data['today'].map(confirm)
#global_data['addsuspect'] = global_data['today'].map(suspect)
#global_data['adddead'] = global_data['today'].map(dead)
#global_data['addheal'] = global_data['today'].map(heal)
world_name = pd.read_excel("世界各国中英文对照.xlsx")
global_data = pd.merge(global_data,world_name,left_on ="name",right_on = "中文",how="inner")
#global_data = global_data[["name","英文","confirm","suspect","dead","heal","addconfirm","addsuspect","adddead","addheal"]]
global_data = global_data[["name","英文","confirm","suspect","dead","heal","addconfirm"]]
global_data.head(10)

Unnamed: 0,name,英文,confirm,suspect,dead,heal,addconfirm
0,中国,China,81385,104,3253,71305,150
1,韩国,Korea(republic of),3150,0,13,24,0
2,意大利,Italy,653,0,17,45,0
3,新加坡,Singapore,102,0,0,72,0
4,德国,Germany,66,0,0,16,0
5,美国,U.S.A,60,0,0,3,0
6,科威特,Kuwait,43,0,0,0,0
7,泰国,Thailand,42,0,0,28,0
8,法国,France,38,0,2,12,0
9,巴林,Bahrain,33,0,0,0,0
