本文转自[Kaggle - Wuhan Coronavirus : A geographical analysis](https://www.kaggle.com/parulpandey/wuhan-coronavirus-a-geographical-analysis)做了简单的翻译

运行需要在基础镜像上创建镜像，新增以下库

![Image Name](https://cdn.kesci.com/upload/image/q59oqeeds8.png?imageView2/0/w/960/h/960)


# 2019新型冠状病毒（2019-nCoV）数据可视化

In [1]:
# 导入需要的库
import numpy as np 
import pandas as pd 

# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
import pycountry
py.init_notebook_mode(connected=True)
import folium 
from folium import plugins

# Graphics in retina format 
%config InlineBackend.figure_format = 'retina' 

plt.rcParams['figure.figsize'] = 8, 5

# Disable warnings 
import warnings
warnings.filterwarnings('ignore')

Matplotlib is building the font cache using fc-list. This may take a moment.


ModuleNotFoundError: No module named 'pycountry'

随着[世界卫生组织宣布新型冠状病毒爆发为公共卫生紧急事件](https://edition.cnn.com/2020/01/30/Health/coronavirus who public Health emergency international concern declaration/index.html)的消息传出，增加了公众的普遍恐惧。许多国家已经加强了与这种病毒的斗争，而中国的情况仍然很严重。中国大陆以外的20多个国家和地区，包括亚洲、欧洲、北美和中东的印度，已经确认了这种病毒的病例，意大利和菲律宾周四报告了他们的第一例病例。[来源](https://edition.cnn.com/2020/01/30/asia/whan-coronavirus-update-intl-hnk/index.html)

In [None]:
# 导入数据
data= pd.read_csv("/home/kesci/input/2019ncov5600/2019_nCoV_data.csv")
data.head()

In [None]:
# 查看数据情况
data.info()

In [None]:
# 将Last Update 列转换成 datetime64 格式

data['Date'] = data['Date'].apply(pd.to_datetime)
data.drop(['Sno'],axis=1,inplace=True)

data.head()

## 迄今为止，被此次疫情影响的国家

In [None]:
countries = data['Country'].unique().tolist()
print(countries)

print("\n总共被影响的国家/地区数量: ",len(countries))

另外，中国和中国大陆被分开记录，所以需要统一

In [None]:
data['Country'].replace({'Mainland China':'China'},inplace=True)
countries = data['Country'].unique().tolist()
print(countries)
print("\n总共被影响的国家/地区数量: ",len(countries))

## 疫情现状

In [None]:
d = data['Date'][-1:].astype('str')
year = int(d.values[0].split('-')[0])
month = int(d.values[0].split('-')[1])
day = int(d.values[0].split('-')[2].split()[0])

from datetime import date
data_latest = data[data['Date'] > pd.Timestamp(date(year,month,day))]
data_latest.head()

In [None]:
# 计算每个国家的确诊病例数
Number_of_countries = len(data_latest['Country'].value_counts())


cases = pd.DataFrame(data_latest.groupby('Country')['Confirmed'].sum())
cases['Country'] = cases.index
cases.index=np.arange(1,Number_of_countries+1)

global_cases = cases[['Country','Confirmed']]
#global_cases.sort_values(by=['Confirmed'],ascending=False)
global_cases

加入国家的经纬度数据

In [None]:
# 导入经纬度数据集
world_coordinates = pd.read_csv('/home/kesci/input/2019ncov5600/world_coordinates.csv')

# 与原有表格合并
world_data = pd.merge(world_coordinates,global_cases,on='Country')
world_data.head()

## 可视化现有数据

In [None]:
world_map = folium.Map(location=[10, -20], zoom_start=2.3,tiles='Stamen Toner')

for lat, lon, value, name in zip(world_data['latitude'], world_data['longitude'], world_data['Confirmed'], world_data['Country']):
    folium.CircleMarker([lat, lon],
                        radius=10,
                        popup = ('<strong>Country</strong>: ' + str(name).capitalize() + '<br>'
                                '<strong>Confirmed Cases</strong>: ' + str(value) + '<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(world_map)
world_map


点击上图中的红圈就可以看到对应国家的数据

## 关于疫情的其他数据

In [None]:
# 确诊、死亡和痊愈数据
print('Globally Confirmed Cases: ',data_latest['Confirmed'].sum())
print('Global Deaths: ',data_latest['Deaths'].sum())
print('Globally Recovered Cases: ',data_latest['Recovered'].sum())

In [None]:
# 查看不同国家省/州的数据

data_latest.groupby(['Country','Province/State']).sum()

In [None]:
# 有死亡案例发生的地区
data_latest.groupby('Country')['Deaths'].sum().sort_values(ascending=False)[:5]

我们可以看到大部分而死亡病例集中在中国湖北武汉

In [None]:
# 查看治愈病例
data_latest.groupby('Country')['Recovered'].sum().sort_values(ascending=False)[:5]

# 中国的情况

In [None]:
China = data_latest[data_latest['Country']=='China']
China['Province/State']=China['Province/State'].map(lambda x:x.lower())
China

## 除湖北之外确诊和痊愈的数据

In [None]:
f, ax = plt.subplots(figsize=(12, 8))

sns.set_color_codes("pastel")
sns.barplot(x="Confirmed", y="Province/State", data=China[1:],
            label="Confirmed", color="r")

sns.set_color_codes("muted")
sns.barplot(x="Recovered", y="Province/State", data=China[1:],
            label="Recovered", color="g")

# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 400), ylabel="",
       xlabel="Stats")
sns.despine(left=True, bottom=True)

## 中国部分地区的疫情情况可视化

In [None]:
latitude = 39.91666667
longitude = 116.383333
 
china_map = folium.Map(location=[latitude, longitude], zoom_start=12)

china_coordinates= pd.read_csv("/home/kesci/input/2019ncov5600/china_Province_coordinates.csv")
china_coordinates.rename(columns={'name3':'Province/State'},inplace=True)
china_coordinates.head()

In [None]:
df_china_virus = China.merge(china_coordinates)
df_china_virus.head()

In [None]:
data = pd.DataFrame({
   'name':list(df_china_virus['name']),
   'lat':list(df_china_virus['lat']),
   'lon':list(df_china_virus['lon']),
   'Confirmed':list(df_china_virus['Confirmed']),
   'Recovered':list(df_china_virus['Recovered']),
   'Deaths':list(df_china_virus['Deaths'])
})

data.head()

## 迄今为止的确诊案例

In [None]:
 
# create map for total confirmed cases in china till date
china_map1 = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Confirmed'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=13,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Confirmed: ' + str(value) + '<br>'),
                        color='red',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(china_map1)
    folium.Map(titles='jj', attr="attribution")    
china_map1


## 迄今为止的死亡案例

In [None]:


china_map = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Deaths'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=13,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Deaths: ' + str(value) + '<br>'),
                        color='black',
                        
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(china_map)
    folium.Map(titles='jj', attr="attribution")    
china_map


## 迄今为止的痊愈案例

In [None]:
china_map = folium.Map(location=[latitude, longitude], zoom_start=4,tiles='Stamen Toner')

for lat, lon, value, name in zip(data['lat'], data['lon'], data['Recovered'], data['name']):
    folium.CircleMarker([lat, lon],
                        radius=10,
                        popup = ('Province: ' + str(name).capitalize() + '<br>'
                        'Recovered: ' + str(value) + '<br>'),
                        color='green',
                        
                        fill_color='green',
                        fill_opacity=0.7 ).add_to(china_map)
       
china_map


你可以缩放地图，以及点击数据点以获取更多数据