### COVID 19 - 국가별 사망자 수 추이 분석 

In [1]:
import pandas as pd
import json
import os

with open('COVID-19-master/csse_covid_19_data/country_convert.json','r',encoding = 'utf-8-sig') as json_file:
    json_data = json.load(json_file)

def country_convert(row):
    if row in json_data:
        return json_data[row]
    return row

def create_dataframe(filename):
    
    doc = pd.read_csv(path + filename, encoding = 'utf-8-sig')
    
    try:
        doc = doc[['Country_Region','Deaths']]
    except:
        doc = doc[['Country/Region','Deaths']]
        doc.columns = ['Country_Region','Deaths']
    
    doc = doc.dropna(subset = ['Deaths'])
    doc = doc.astype({'Deaths' : 'int64'})
    doc['Country_Region'] = doc['Country_Region'].apply(country_convert)
    doc = doc.groupby('Country_Region').sum()
    
    date_cols = filename.split('.')[0].lstrip('0').replace('-','/')
    doc.columns = [date_cols]
    
    return doc

def generate_merge_dataframe(path):
    
    file_list = os.listdir(path)
    csv_list = []
    first_doc = True
    
    for file in file_list:
        if file.split('.')[-1] == 'csv':
            csv_list.append(file)
    
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dataframe(file)
        if first_doc:
            final_doc = doc
            first_doc = False
        else:
            final_doc = pd.merge(final_doc, doc, how = 'outer', left_index = True, right_index = True)
    
    final_doc = final_doc.fillna(0)
    
    return final_doc

In [2]:
path = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'
final_dataframe = generate_merge_dataframe(path)
final_dataframe

Unnamed: 0_level_0,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,1/31/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,369.0,384.0,405.0,426.0,446.0,451.0,471.0,478.0,491.0,504.0
Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,34.0,34.0,34.0,35.0,36.0,36.0,36.0,36.0,37.0,38.0
Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,715.0,724.0,732.0,741.0,751.0,760.0,767.0,777.0,788.0,799.0
Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,52.0,52.0
Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,5.0,5.0,6.0,6.0,6.0,6.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
West Bank and Gaza,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,112.0,127.0,129.0,136.0,139.0,160.0,164.0,208.0,214.0,244.0
Zambia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0


In [3]:
final_dataframe = final_dataframe.astype('int64')
final_dataframe

Unnamed: 0_level_0,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,1/31/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,369,384,405,426,446,451,471,478,491,504
Albania,0,0,0,0,0,0,0,0,0,0,...,34,34,34,35,36,36,36,36,37,38
Algeria,0,0,0,0,0,0,0,0,0,0,...,715,724,732,741,751,760,767,777,788,799
Andorra,0,0,0,0,0,0,0,0,0,0,...,51,51,51,51,51,51,51,51,52,52
Angola,0,0,0,0,0,0,0,0,0,0,...,4,4,4,5,5,6,6,6,6,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vietnam,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
West Bank and Gaza,0,0,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
Yemen,0,0,0,0,0,0,0,0,0,0,...,112,127,129,136,139,160,164,208,214,244
Zambia,0,0,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,11,11,11,11


In [4]:
final_dataframe.to_csv('./covid-19-deaths.csv', encoding = 'utf-8-sig')

In [5]:
# keep drop na, na_value
country_info = pd.read_csv('COVID-19-master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv', encoding = 'utf-8-sig',keep_default_na = False, na_values='')
country_info.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key
0,0,0,,BW,,,,,,Botswana,,,Botswana
1,1,1,,BI,,,,,,Burundi,,,Burundi
2,2,2,,SL,,,,,,Sierra Leone,,,Sierra Leone
3,3,3,4.0,AF,AFG,4.0,,,,Afghanistan,33.93911,67.709953,Afghanistan
4,4,4,8.0,AL,ALB,8.0,,,,Albania,41.1533,20.1683,Albania


In [6]:
country_info = country_info[['iso2', 'Country_Region']]
country_info.shape

(3560, 2)

In [7]:
country_info = country_info.drop_duplicates(subset = 'Country_Region')
country_info.shape

(180, 2)

In [8]:
country_info[country_info['iso2'].isnull()]

Unnamed: 0,iso2,Country_Region
45,,Diamond Princess
114,,MS Zaandam


In [9]:
country_info = country_info.dropna(subset = ['iso2'])
country_info[country_info['iso2'].isnull()]

Unnamed: 0,iso2,Country_Region


In [10]:
country_info

Unnamed: 0,iso2,Country_Region
0,BW,Botswana
1,BI,Burundi
2,SL,Sierra Leone
3,AF,Afghanistan
4,AL,Albania
...,...,...
175,ZW,Zimbabwe
199,AU,Australia
207,CA,Canada
222,CN,China


In [11]:
def create_flag_link(iso2):
    flag_link = 'https://www.countryflags.io/' + iso2 + '/flat/64.png'
    return flag_link

In [12]:
country_info['iso2'] = country_info['iso2'].apply(create_flag_link)
country_info.head()

Unnamed: 0,iso2,Country_Region
0,https://www.countryflags.io/BW/flat/64.png,Botswana
1,https://www.countryflags.io/BI/flat/64.png,Burundi
2,https://www.countryflags.io/SL/flat/64.png,Sierra Leone
3,https://www.countryflags.io/AF/flat/64.png,Afghanistan
4,https://www.countryflags.io/AL/flat/64.png,Albania


In [13]:
country_info.columns = ['Country_Flag', 'Country_Region']
country_info.head()

Unnamed: 0,Country_Flag,Country_Region
0,https://www.countryflags.io/BW/flat/64.png,Botswana
1,https://www.countryflags.io/BI/flat/64.png,Burundi
2,https://www.countryflags.io/SL/flat/64.png,Sierra Leone
3,https://www.countryflags.io/AF/flat/64.png,Afghanistan
4,https://www.countryflags.io/AL/flat/64.png,Albania


In [17]:
final_df = pd.merge(final_dataframe,country_info, how = 'left', on = 'Country_Region')
final_df.head()

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020,Country_Flag
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,384,405,426,446,451,471,478,491,504,https://www.countryflags.io/AF/flat/64.png
1,Albania,0,0,0,0,0,0,0,0,0,...,34,34,35,36,36,36,36,37,38,https://www.countryflags.io/AL/flat/64.png
2,Algeria,0,0,0,0,0,0,0,0,0,...,724,732,741,751,760,767,777,788,799,https://www.countryflags.io/DZ/flat/64.png
3,Andorra,0,0,0,0,0,0,0,0,0,...,51,51,51,51,51,51,51,52,52,https://www.countryflags.io/AD/flat/64.png
4,Angola,0,0,0,0,0,0,0,0,0,...,4,4,5,5,6,6,6,6,7,https://www.countryflags.io/AO/flat/64.png


In [24]:
col_list = final_df.columns.tolist()
col_list.remove('Country_Flag')
col_list.insert(1, 'Country_Flag')

final_df = final_df[col_list]
final_df

Unnamed: 0,Country_Region,Country_Flag,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
0,Afghanistan,https://www.countryflags.io/AF/flat/64.png,0,0,0,0,0,0,0,0,...,369,384,405,426,446,451,471,478,491,504
1,Albania,https://www.countryflags.io/AL/flat/64.png,0,0,0,0,0,0,0,0,...,34,34,34,35,36,36,36,36,37,38
2,Algeria,https://www.countryflags.io/DZ/flat/64.png,0,0,0,0,0,0,0,0,...,715,724,732,741,751,760,767,777,788,799
3,Andorra,https://www.countryflags.io/AD/flat/64.png,0,0,0,0,0,0,0,0,...,51,51,51,51,51,51,51,51,52,52
4,Angola,https://www.countryflags.io/AO/flat/64.png,0,0,0,0,0,0,0,0,...,4,4,4,5,5,6,6,6,6,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,Vietnam,https://www.countryflags.io/VN/flat/64.png,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
183,West Bank and Gaza,https://www.countryflags.io/PS/flat/64.png,0,0,0,0,0,0,0,0,...,3,3,3,3,3,3,3,3,3,3
184,Yemen,,0,0,0,0,0,0,0,0,...,112,127,129,136,139,160,164,208,214,244
185,Zambia,https://www.countryflags.io/ZM/flat/64.png,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,11,11,11,11


In [29]:
final_df.to_csv('./covid-19-deaths-data-for-graph.csv', encoding = 'utf-8-sig')

### [완성] : https://app.flourish.studio/visualisation/3285190/edit