# Covid19

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
region = pd.read_csv('./coronavirusdataset/Region.csv')
timeprovince = pd.read_csv('./coronavirusdataset/TimeProvince.csv')
timegender = pd.read_csv('./coronavirusdataset/TimeGender.csv')
timeage = pd.read_csv('./coronavirusdataset/TimeAge.csv')
time = pd.read_csv('./coronavirusdataset/Time.csv')
patientinfo = pd.read_csv('./coronavirusdataset/PatientInfo.csv')
patientroute = pd.read_csv('./coronavirusdataset/PatientRoute.csv')
case = pd.read_csv('./coronavirusdataset/Case.csv')
weather = pd.read_csv('./coronavirusdataset/Weather.csv')

## Case

In [3]:
case.head()

Unnamed: 0,case_id,province,city,group,infection_case,confirmed,latitude,longitude
0,1000001,Seoul,Guro-gu,True,Guro-gu Call Center,79,37.508163,126.884387
1,1000002,Seoul,Dongdaemun-gu,True,Dongan Church,24,37.592888,127.056766
2,1000003,Seoul,Eunpyeong-gu,True,Eunpyeong St. Mary's Hospital,14,37.63369,126.9165
3,1000004,Seoul,Seongdong-gu,True,Seongdong-gu APT,13,37.55713,127.0403
4,1000005,Seoul,Jongno-gu,True,Jongno Community Center,10,37.57681,127.006


In [4]:
df_province = case.groupby('province')['confirmed'].sum().reset_index()

In [5]:
df_confirmed = case.groupby('infection_case')['confirmed'].sum().reset_index()

In [6]:
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [7]:
df_confirmed = df_confirmed.sort_values('confirmed')
fig = px.bar(
    data_frame = df_confirmed,
    x = df_confirmed['confirmed'][-10:],
    y = df_confirmed['infection_case'][-10:],
    color = df_confirmed['infection_case'][-10:],
    title = 'Infection Case Top 10',
    orientation = 'h'
)
fig.show()

In [8]:
df_province

Unnamed: 0,province,confirmed
0,Busan,91
1,Chungcheongbuk-do,31
2,Chungcheongnam-do,115
3,Daegu,6066
4,Daejeon,22
5,Gangwon-do,29
6,Gwangju,16
7,Gyeonggi-do,164
8,Gyeongsangbuk-do,1145
9,Gyeongsangnam-do,85


In [9]:
cond = region['city'].apply(lambda x : x in region['province'].unique())
df_coordinate = region.loc[cond, ['province', 'latitude', 'longitude']]

In [10]:
center = {
    'lat' : df_coordinate.loc[df_coordinate['province'] == 'Korea', 'latitude'].values[0],
    'lon' : df_coordinate.loc[df_coordinate['province'] == 'Korea', 'longitude'].values[0]
}
center

{'lat': 37.566953000000005, 'lon': 126.977977}

In [11]:
df_merge = pd.merge(df_province, df_coordinate, how = 'left')

In [12]:
df_merge

Unnamed: 0,province,confirmed,latitude,longitude
0,Busan,91,35.179884,129.074796
1,Chungcheongbuk-do,31,36.63568,127.491384
2,Chungcheongnam-do,115,36.658976,126.673318
3,Daegu,6066,35.87215,128.601783
4,Daejeon,22,36.350621,127.384744
5,Gangwon-do,29,37.885369,127.729868
6,Gwangju,16,35.160467,126.851392
7,Gyeonggi-do,164,37.275119,127.009466
8,Gyeongsangbuk-do,1145,36.576032,128.505599
9,Gyeongsangnam-do,85,35.238294,128.692397


In [13]:
fig = px.scatter_mapbox(
    data_frame = df_merge,
    lat = 'latitude',
    lon = 'longitude',
    color = 'province',
    size = 'confirmed',
    hover_name = 'province',
    zoom = 5,
    height = 500,
    center = center
)
fig.update_layout(mapbox_style = "carto-positron")
fig.show()

In [14]:
import folium
lats = [lat for lat in df_merge['latitude'].values]
lons = [lon for lon in df_merge['longitude'].values]

Map = folium.Map(location = [center['lat'], center['lon']], zoom_start = 7)

for i in range(len(lats)):
    folium.CircleMarker(
        location  = [lats[i], lons[i]],
        radius = 0.01 * df_merge['confirmed'].iloc[i],
        color = 'red',
        fill_color = 'red',
        fill_opaciy = 0.5,
        popup = (str(df_merge.loc[i, 'province']) + ':' + str(df_merge.loc[i, 'confirmed']))
    ).add_to(Map)
Map

## Time

In [15]:
df_time = time.copy()
df_time.drop('time', axis = 1, inplace = True)

In [16]:
df_timeprovince = timeprovince.copy()
df_timeprovince.drop('time', axis = 1, inplace = True)

In [17]:
df_timeprovince

Unnamed: 0,date,province,confirmed,released,deceased
0,2020-01-20,Seoul,0,0,0
1,2020-01-20,Busan,0,0,0
2,2020-01-20,Daegu,0,0,0
3,2020-01-20,Incheon,1,0,0
4,2020-01-20,Gwangju,0,0,0
...,...,...,...,...,...
1066,2020-03-22,Jeollabuk-do,10,7,0
1067,2020-03-22,Jeollanam-do,6,3,0
1068,2020-03-22,Gyeongsangbuk-do,1254,432,24
1069,2020-03-22,Gyeongsangnam-do,88,42,0


In [18]:
timeprovince.groupby('date')[['confirmed', 'released', 'deceased']].sum()

Unnamed: 0_level_0,confirmed,released,deceased
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-20,1,0,0
2020-01-21,1,0,0
2020-01-22,1,0,0
2020-01-23,1,0,0
2020-01-24,2,0,0
...,...,...,...
2020-03-18,8402,1540,84
2020-03-19,8549,1947,91
2020-03-20,8635,2233,94
2020-03-21,8776,2612,102


In [19]:
df_time_merge = pd.merge(df_timeprovince, df_time, how = 'left', on = 'date')

In [20]:
df_time_merge

Unnamed: 0,date,province,confirmed_x,released_x,deceased_x,test,negative,confirmed_y,released_y,deceased_y
0,2020-01-20,Seoul,0,0,0,1,0,1,0,0
1,2020-01-20,Busan,0,0,0,1,0,1,0,0
2,2020-01-20,Daegu,0,0,0,1,0,1,0,0
3,2020-01-20,Incheon,1,0,0,1,0,1,0,0
4,2020-01-20,Gwangju,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...
1066,2020-03-22,Jeollabuk-do,10,7,0,331780,308343,8897,2909,104
1067,2020-03-22,Jeollanam-do,6,3,0,331780,308343,8897,2909,104
1068,2020-03-22,Gyeongsangbuk-do,1254,432,24,331780,308343,8897,2909,104
1069,2020-03-22,Gyeongsangnam-do,88,42,0,331780,308343,8897,2909,104
