# 中美疫情状况分析


## 环境配置

In [2]:
!pip install --upgrade pyecharts

Collecting pyecharts
  Downloading pyecharts-1.9.1-py3-none-any.whl (135 kB)
[K     |████████████████████████████████| 135 kB 379 kB/s eta 0:00:01
Collecting simplejson
  Downloading simplejson-3.17.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (129 kB)
[K     |████████████████████████████████| 129 kB 192 kB/s eta 0:00:01
[?25hInstalling collected packages: simplejson, pyecharts
  Attempting uninstall: pyecharts
    Found existing installation: pyecharts 1.1.0
    Uninstalling pyecharts-1.1.0:
      Successfully uninstalled pyecharts-1.1.0
Successfully installed pyecharts-1.9.1 simplejson-3.17.6
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


## 模块导入

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from pyecharts import options as opts
from pyecharts.charts import Bar, Line , Timeline , Calendar
#字体设置
font = {'family': 'MicroSoft YaHei'}
matplotlib.rc("font",**font)

## 读取数据

In [4]:
file = pd.read_csv('/home/mw/input/covid199004/owid-covid-data.csv')
file.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167708 entries, 0 to 167707
Data columns (total 67 columns):
iso_code                                      167708 non-null object
continent                                     157674 non-null object
location                                      167708 non-null object
date                                          167708 non-null object
total_cases                                   164661 non-null float64
new_cases                                     164507 non-null float64
new_cases_smoothed                            162519 non-null float64
total_deaths                                  146765 non-null float64
new_deaths                                    146799 non-null float64
new_deaths_smoothed                           144692 non-null float64
total_cases_per_million                       163897 non-null float64
new_cases_per_million                         163743 non-null float64
new_cases_smoothed_per_million                161761 

## 数据预处理

### 我国2020-2022年每一百万人中确诊的COVID-19病例数据

In [5]:
chinaData = file.loc[file['location']=='China']
# chinaData.info()
dateList_china = chinaData['date']
newcaseList_china = chinaData['new_cases_smoothed_per_million']

print(dateList_china.isnull().sum())
print(newcaseList_china.isnull().sum())

0
21


### 处理缺失值

In [6]:
newcaseList_china.fillna(0,inplace=True)
print(newcaseList_china)

chinaData = [[i,j] for i,j in zip(dateList_china,newcaseList_china)]

31791    0.000
31792    0.000
31793    0.000
31794    0.000
31795    0.000
31796    0.000
31797    0.000
31798    0.546
31799    0.740
31800    0.877
31801    1.036
31802    1.439
31803    1.665
31804    1.799
31805    2.111
31806    2.219
31807    2.403
31808    2.464
31809    2.293
31810    2.237
31811    2.042
31812    1.710
31813    2.896
31814    3.187
31815    3.123
31816    3.032
31817    2.973
31818    2.949
31819    2.952
31820    1.500
         ...  
32541    0.049
32542    0.056
32543    0.058
32544    0.059
32545    0.056
32546    0.055
32547    0.062
32548    0.060
32549    0.064
32550    0.077
32551    0.083
32552    0.089
32553    0.099
32554    0.108
32555    0.122
32556    0.133
32557    0.137
32558    0.146
32559    0.152
32560    0.154
32561    0.157
32562    0.164
32563    0.167
32564    0.176
32565    0.185
32566    0.197
32567    0.208
32568    0.216
32569    0.265
32570    0.282
Name: new_cases_smoothed_per_million, Length: 780, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


### 美国2020-2022年每一百万人中确诊的COVID-19病例数据

In [7]:
usaData = file.loc[file['location']=='United States']
dateList_usa = usaData['date']
newcaseList_usa = usaData['new_cases_smoothed_per_million']

print(dateList_usa.isnull().sum())
print(newcaseList_usa.isnull().sum())

0
7


### 处理缺失值

In [8]:
newcaseList_usa.fillna(0,inplace=True)
print(newcaseList_usa)

usaData = [[i,j] for i,j in zip(dateList_usa,newcaseList_usa)]

158526      0.000
158527      0.000
158528      0.000
158529      0.000
158530      0.000
158531      0.000
158532      0.000
158533      0.002
158534      0.002
158535      0.003
158536      0.003
158537      0.001
158538      0.003
158539      0.003
158540      0.002
158541      0.003
158542      0.002
158543      0.002
158544      0.002
158545      0.000
158546      0.001
158547      0.001
158548      0.001
158549      0.001
158550      0.001
158551      0.001
158552      0.001
158553      0.000
158554      0.000
158555      0.000
           ...   
159276    620.572
159277    558.007
159278    543.109
159279    532.287
159280    456.826
159281    412.357
159282    380.765
159283    350.879
159284    323.340
159285    314.513
159286    299.351
159287    261.958
159288    252.621
159289    239.839
159290    224.920
159291    194.586
159292    201.430
159293    198.350
159294    204.401
159295    186.200
159296    172.736
159297    165.101
159298    155.218
159299    141.426
159300    

## 图像绘制

### 2020-2022年间我国每一百万人中确诊的COVID-19病例日历图

In [42]:
calendar_china=(
    Calendar(init_opts=opts.InitOpts(width="1600px", height="400px",theme='chalk'))
    .add(
        series_name="",
        yaxis_data=chinaData,
        calendar_opts=opts.CalendarOpts(
            pos_top="100",
            pos_left="40",
            pos_right="40",
            pos_bottom='100',
            range_=['2020-01-22', '2022-02-20'],
            yearlabel_opts=opts.CalendarYearLabelOpts(is_show=False, label_color='#D0D0D0',label_font_size=17),
            daylabel_opts=opts.CalendarDayLabelOpts(name_map="cn", label_color='#D0D0D0',label_font_size=17),
            monthlabel_opts=opts.CalendarMonthLabelOpts(name_map="cn", label_color='#D0D0D0',label_font_size=17),
        )
    )
    .set_series_opts(label_opts=opts.LabelOpts(font_size = 15))
    .set_global_opts(  
        title_opts=opts.TitleOpts(pos_top="30", pos_left="center", 
            title="2020-2022年中国每一百万人中确诊数据",
            title_textstyle_opts=opts.TextStyleOpts(font_size=25)
        ),
        visualmap_opts=opts.VisualMapOpts(  
            max_=3.2, 
            min_=0, 
            orient="horizontal", 
            is_piecewise=True,
            textstyle_opts=opts.TextStyleOpts(color="#D0D0D0",font_size=18),
        ),
        toolbox_opts=opts.ToolboxOpts()
    )
)

calendar_china.render_notebook()

### 2020-2022年间美国每一百万人中确诊的COVID-19病例折线图

In [43]:
calendar_usa=(
    Calendar(init_opts=opts.InitOpts(width="1600px", height="400px",theme='chalk'))
    .add(
        series_name='',
        yaxis_data=usaData,
        calendar_opts=opts.CalendarOpts(
            pos_top="100",
            pos_left="40",
            pos_right="40",
            pos_bottom='100',
            range_=['2020-01-22', '2022-02-20'],
            yearlabel_opts=opts.CalendarYearLabelOpts(is_show=False, label_color='#D0D0D0',label_font_size=17),
            daylabel_opts=opts.CalendarDayLabelOpts(name_map="cn", label_color='#D0D0D0',label_font_size=17),
            monthlabel_opts=opts.CalendarMonthLabelOpts(name_map="cn", label_color='#D0D0D0',label_font_size=17),
        )
    )
    .set_global_opts(  
        title_opts=opts.TitleOpts(pos_top="30", pos_left="center", 
            title="2020-2022年美国确诊数据",
            title_textstyle_opts=opts.TextStyleOpts(font_size=25)
        ),
        visualmap_opts=opts.VisualMapOpts(  
            max_=800, 
            min_=0, 
            orient="horizontal", 
            is_piecewise=False,
            textstyle_opts=opts.TextStyleOpts(color="#D0D0D0",font_size=18),
        ),
        toolbox_opts=opts.ToolboxOpts(),
    )
)

calendar_usa.render_notebook()