## Most Recent dataset

In [8]:
import pandas as pd
import json, os 

with open('COVID-19-master/csse_covid_19_data/country_convert.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)

def country_name_convert(row):
    if row['Country_Region'] in json_data:
        return json_data[row['Country_Region']]
    return row['Country_Region']

def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    try:
        doc = doc[['Country_Region', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Country/Region', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Country_Region', 'Confirmed']
    doc = doc.dropna(subset=['Confirmed'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    doc['Country_Region'] = doc.apply(country_name_convert, axis=1)   # 4. 'Country_Region'의 국가명을 여러 파일에 일관되게 변경하기
    doc = doc.astype({'Confirmed': 'int64'})   # 5. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Country_Region').sum()  # 6. 특정 컬럼으로 중복된 데이터를 합치기

    # 7. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기
    date_column = filename.split(".")[0].replace('-', '/')    
    date_column = date_column.split("/")[2] + "/" + date_column.split("/")[0] + "/" + date_column.split("/")[1]
    doc.columns = [date_column]
    return doc

In [9]:
import os

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [10]:
def create_flag_link(row):
    flag_link = 'https://flagcdn.com/48x36/' + row + '.png'
    return flag_link


PATH = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')

country_info = pd.read_csv("COVID-19-master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv", encoding='utf-8-sig', keep_default_na=False, na_values='')
country_info = country_info[['iso2', 'Country_Region']]
country_info = country_info.drop_duplicates(subset='Country_Region', keep='last')

doc_final_country = pd.merge(df_confirmed, country_info, how='left', on='Country_Region')
doc_final_country = doc_final_country.dropna(subset=['iso2'])
doc_final_country['iso2'] = doc_final_country['iso2'].str.lower().apply(create_flag_link)

cols = doc_final_country.columns.tolist()
cols.sort()

cols.remove('iso2')
cols.insert(0, 'iso2')
cols.remove('Country_Region')
cols.insert(0,'Country_Region')

doc_final_country = doc_final_country[cols]
cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("COVID-19-master/final_covid_recent_data_for_graph.csv")

## Recovery Visualization

In [17]:
import pandas as pd
import json, os 

with open('csse_covid_19_data/country_convert.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)

def country_name_convert(row):
    if row['Country_Region'] in json_data:
        return json_data[row['Country_Region']]
    return row['Country_Region']

def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    try:
        doc = doc[['Country_Region', 'Recovered']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Country/Region', 'Recovered']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Country_Region', 'Recovered']
    doc = doc.dropna(subset=['Recovered'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    doc['Country_Region'] = doc.apply(country_name_convert, axis=1)   # 4. 'Country_Region'의 국가명을 여러 파일에 일관되게 변경하기
    doc = doc.astype({'Recovered': 'int64'})   # 5. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Country_Region').sum()  # 6. 특정 컬럼으로 중복된 데이터를 합치기

    # 7. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기
    date_column = filename.split(".")[0].replace('-', '/')    
    date_column = date_column.split("/")[2] + "/" + date_column.split("/")[0] + "/" + date_column.split("/")[1]
    doc.columns = [date_column]
    return doc

In [18]:
import os

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [19]:
def create_flag_link(row):
    flag_link = 'https://flagcdn.com/48x36/' + row + '.png'
    return flag_link


PATH = 'csse_covid_19_data/csse_covid_19_daily_reports/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')

country_info = pd.read_csv("csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv", encoding='utf-8-sig', keep_default_na=False, na_values='')
country_info = country_info[['iso2', 'Country_Region']]
country_info = country_info.drop_duplicates(subset='Country_Region', keep='last')

doc_final_country = pd.merge(df_confirmed, country_info, how='left', on='Country_Region')
doc_final_country = doc_final_country.dropna(subset=['iso2'])
doc_final_country['iso2'] = doc_final_country['iso2'].str.lower().apply(create_flag_link)

cols = doc_final_country.columns.tolist()
cols.sort()

cols.remove('iso2')
cols.insert(0, 'iso2')
cols.remove('Country_Region')
cols.insert(0,'Country_Region')

doc_final_country = doc_final_country[cols]
cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("final_covid_recent_recovery_data_for_graph.csv")

## Death Visualization

In [23]:
import pandas as pd
import json, os 

with open('csse_covid_19_data/country_convert.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)

def country_name_convert(row):
    if row['Country_Region'] in json_data:
        return json_data[row['Country_Region']]
    return row['Country_Region']

def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    try:
        doc = doc[['Country_Region', 'Deaths']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Country/Region', 'Deaths']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Country_Region', 'Deaths']
    doc = doc.dropna(subset=['Deaths'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    doc['Country_Region'] = doc.apply(country_name_convert, axis=1)   # 4. 'Country_Region'의 국가명을 여러 파일에 일관되게 변경하기
    doc = doc.astype({'Deaths': 'int64'})   # 5. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Country_Region').sum()  # 6. 특정 컬럼으로 중복된 데이터를 합치기

    # 7. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기
    date_column = filename.split(".")[0].replace('-', '/')    
    date_column = date_column.split("/")[2] + "/" + date_column.split("/")[0] + "/" + date_column.split("/")[1]
    doc.columns = [date_column]
    return doc

In [24]:
import os

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [25]:
def create_flag_link(row):
    flag_link = 'https://flagcdn.com/48x36/' + row + '.png'
    return flag_link


PATH = 'csse_covid_19_data/csse_covid_19_daily_reports/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')

country_info = pd.read_csv("csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv", encoding='utf-8-sig', keep_default_na=False, na_values='')
country_info = country_info[['iso2', 'Country_Region']]
country_info = country_info.drop_duplicates(subset='Country_Region', keep='last')

doc_final_country = pd.merge(df_confirmed, country_info, how='left', on='Country_Region')
doc_final_country = doc_final_country.dropna(subset=['iso2'])
doc_final_country['iso2'] = doc_final_country['iso2'].str.lower().apply(create_flag_link)

cols = doc_final_country.columns.tolist()
cols.sort()

cols.remove('iso2')
cols.insert(0, 'iso2')
cols.remove('Country_Region')
cols.insert(0,'Country_Region')

doc_final_country = doc_final_country[cols]
cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("final_covid_recent_deaths_data_for_graph.csv")

# US COIVD-19

In [1]:
import pandas as pd
import json, os 

def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    
    try:
        doc = doc[['Province_State', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Province/State', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Province_State', 'Confirmed']
    doc = doc.dropna(subset=['Confirmed'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    doc = doc.astype({'Confirmed': 'int64'})   # 4. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Province_State').sum()  # 5. 특정 컬럼으로 중복된 데이터를 합치기
    

    # 6. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기 년도를 맨 앞으로
    date_column = filename.split(".")[0].replace('-', '/') 
    date_column = date_column.split("/")[2] + "/" + date_column.split("/")[0] + "/" + date_column.split("/")[1] # 년, 월, 일
    doc.columns = [date_column]
    return doc

In [2]:
import os

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [3]:
def create_flag_link(row):
    flag_link = 'https://flagcdn.com/w40/us-' + row + '.png'  # us 국기 추가
    return flag_link

PATH = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports_us/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')
df_confirmed.reset_index(inplace=True)

In [4]:
with open('COVID-19-master/csse_covid_19_data/US_convert.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)
    
def country_name_convert(row):
    if row['Province_State'] in json_data:
        return json_data[row['Province_State']]
    return False
    
df_confirmed['State'] = df_confirmed.apply(country_name_convert, axis=1) 
df_confirmed =  df_confirmed.loc[df_confirmed['State'] != False] # False 행 삭제
df_confirmed['State'] = df_confirmed['State'].str.lower().apply(create_flag_link) # 이미지
df_confirmed

Unnamed: 0,Province_State,2021/01/01,2022/01/01,2021/01/02,2022/01/02,2021/01/03,2022/01/03,2021/01/04,2022/01/04,2021/01/05,...,2021/12/27,2020/12/28,2021/12/28,2020/12/29,2021/12/29,2020/12/30,2021/12/30,2020/12/31,2021/12/31,State
0,Alabama,365747,896614,369458,909541,371934,913603,374095,921175,379593,...,870627,347897,874332,351804,880307,356820,888563,361226,896614,https://flagcdn.com/w40/us-al.png
1,Alaska,47019,157169,47821,157169,48118,160089,48382,160089,48582,...,156177,45930,156177,46123,157169,46504,157169,47014,157169,https://flagcdn.com/w40/us-ak.png
3,Arizona,530267,1389708,539150,1390409,556384,1404601,561542,1411813,567474,...,1362693,504423,1362693,507222,1364669,512489,1368080,520207,1381488,https://flagcdn.com/w40/us-az.png
4,Arkansas,229442,570641,231442,572822,233475,574572,234781,581134,238888,...,551394,216528,553808,219246,557551,222430,562529,225138,566486,https://flagcdn.com/w40/us-ar.png
5,California,2435176,5520392,2481504,5567915,2512884,5707866,2549792,5790695,2585074,...,5357861,2269477,5387697,2306544,5427677,2327458,5482593,2384933,5518699,https://flagcdn.com/w40/us-ca.png
6,Colorado,337161,929275,339172,929275,341250,954499,343435,962777,346893,...,896403,326668,902783,328408,910128,330859,919112,334097,929275,https://flagcdn.com/w40/us-co.png
7,Connecticut,185708,510188,190120,510188,190120,533866,194636,544468,196968,...,489211,181200,494964,181967,502484,183663,510188,185708,510188,https://flagcdn.com/w40/us-ct.png
8,Delaware,58064,183880,58873,186996,59484,189382,60333,191649,61100,...,173258,55488,173824,56189,174909,56596,176863,57456,180366,https://flagcdn.com/w40/us-de.png
11,Florida,1323315,4209927,1354833,4209927,1365436,4209927,1376692,4360178,1392123,...,3936170,1280177,3965229,1292252,4012152,1306123,4090000,1323315,4209927,https://flagcdn.com/w40/us-fl.png
12,Georgia,677589,1839879,685122,1839879,690900,1839879,696063,1904918,706154,...,1759879,636240,1772897,645690,1792021,654743,1816441,666452,1839879,https://flagcdn.com/w40/us-ga.png


In [5]:
cols = df_confirmed.columns.tolist()
cols.sort() # 문자는 맨뒤로 
cols
cols.remove('State')
cols.insert(0, 'State')

cols.remove('Province_State')
cols.insert(0, 'Province_State')

doc_final_country = df_confirmed[cols]
doc_final_country


cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("COVID-19-master/final_us_covid_data_for_graph.csv")  #파일 생성

# China Covid-19

In [16]:
import pandas as pd
import json, os 

def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    
    try:
        doc = doc[['Province_State', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Province/State', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Province_State', 'Confirmed']
    doc = doc.dropna(subset=['Confirmed'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    
    doc = doc.astype({'Confirmed': 'int64'})   # 4. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Province_State').sum()  # 5. 특정 컬럼으로 중복된 데이터를 합치기
    

    # 6. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기  년/월/일 순으로 재배치
    date_column = filename.split(".")[0].replace('-', '/') 
    date_column = date_column.split("/")[2] + "/" + date_column.split("/")[0] + "/" + date_column.split("/")[1] # 년, 월, 일
    doc.columns = [date_column]
    return doc

In [17]:
import os

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    csv_list.sort()
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [18]:
def create_flag_link(row):
    flag_link = 'https://flagcdn.com/48x36/' + row + '.png'  # china 국기
    return flag_link

PATH = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')
df_confirmed.reset_index(inplace=True)

In [19]:
with open('COVID-19-master/csse_covid_19_data/CN_State.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)
    
def country_name_convert(row):
    if row['Province_State'] in json_data:
        return json_data[row['Province_State']]
    return False
    
df_confirmed['State'] = df_confirmed.apply(country_name_convert, axis=1) 
df_confirmed =  df_confirmed.loc[df_confirmed['State'] != False] # False 행 삭제
df_confirmed['State'] = df_confirmed['State'].str.lower().apply(create_flag_link) # 이미지
df_confirmed

Unnamed: 0,Province_State,2021/01/01,2022/01/01,2021/01/02,2022/01/02,2021/01/03,2022/01/03,2021/01/04,2022/01/04,2021/01/05,...,2021/12/27,2020/12/28,2021/12/28,2020/12/29,2021/12/29,2020/12/30,2021/12/30,2020/12/31,2021/12/31,State
24,Anhui,993,1009,993,1009,993,1009,993,1009,993,...,1009,993,1009,993,1009,993,1009,993,1009,https://flagcdn.com/48x36/ah.png
60,Beijing,988,1211,990,1215,992,1215,993,1216,994,...,1208,979,1208,980,1209,982,1209,987,1211,https://flagcdn.com/48x36/bj.png
124,Chongqing,590,611,590,611,590,611,590,611,590,...,611,590,611,590,611,590,611,590,611,https://flagcdn.com/48x36/cq.png
196,Fujian,513,1372,516,1372,517,1383,518,1390,518,...,1352,509,1354,511,1358,512,1359,513,1363,https://flagcdn.com/48x36/fj.png
202,Gansu,182,356,182,356,182,356,182,356,182,...,356,182,356,182,356,182,356,182,356,https://flagcdn.com/48x36/gs.png
222,Guangdong,2046,3467,2051,3467,2053,3486,2057,3493,2060,...,3429,2040,3433,2041,3443,2044,3446,2046,3457,https://flagcdn.com/48x36/gd.png
223,Guangxi,264,630,264,637,264,641,264,650,264,...,599,264,605,264,610,264,613,264,622,https://flagcdn.com/48x36/gx.png
227,Guizhou,147,160,147,160,147,160,147,160,147,...,159,147,159,147,159,147,159,147,160,https://flagcdn.com/48x36/gz.png
230,Hainan,171,190,171,190,171,190,171,190,171,...,190,171,190,171,190,171,190,171,190,https://flagcdn.com/48x36/hi.png
239,Hebei,373,1458,374,1458,378,1458,392,1458,412,...,1458,373,1458,373,1458,373,1458,373,1458,https://flagcdn.com/48x36/he.png


In [20]:
cols = df_confirmed.columns.tolist()
cols.sort() # 문자는 맨뒤로 
cols
cols.remove('State')
cols.insert(0, 'State')

cols.remove('Province_State')
cols.insert(0, 'Province_State')

doc_final_country = df_confirmed[cols]
doc_final_country


cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("COVID-19-master/final_china_covid_data_for_graph.csv")  #파일 생성