# South Korea Covid Data Exploration

## import & helpers

In [18]:
# IMPORT

%matplotlib inline

# for figure
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
plt.style.use('seaborn-whitegrid')
import seaborn as sns
sns.set(color_codes=True, font_scale=1.33)

# built-in libs
import os
import datetime
from urllib.request import Request, urlopen
from xml.etree import ElementTree
import io
import json

# third party libs
import pandas as pd
import numpy as np
import requests

# project libs
import settings
from my_helpers.dates import add_days
from my_helpers.dates import generate_list_dates
from my_helpers.model import calc_sum_mobile
from my_helpers.model import calc_rt_from_sum
from my_helpers.model import NB_DAYS_CV
from my_helpers.utils import clean_file

# DEFINITONS 
PATH_TO_SAVE_DATA = settings.PATH_TO_SAVE_DATA

PATH_DF_FEAT_KR = PATH_TO_SAVE_DATA + '/' + 'df_feat_kr.csv'
PATH_DF_METEO_RAW_KR = os.path.join(PATH_TO_SAVE_DATA, 'df_meteo_raw_kr.csv')
PATH_DF_METEO_KR = os.path.join(PATH_TO_SAVE_DATA, 'df_meteo_kr.csv')
DATE_FIRST_CASES_GOUV_KR = '2020-02-01' # First data date in Gouv KR
DATE_FIRST_FEAT_OK_KR = '2020-04-03' # First data age/cases/meteo for features
URL_API_CASES_KR ='http://openapi.data.go.kr/openapi/service/rest/Covid19/' + \
    'getCovid19InfStateJson'
URL_API_AGE_KR = 'http://openapi.data.go.kr/openapi/service/rest/Covid19/' + \
    'getCovid19GenAgeCaseInfJson'
URL_API_AREA_KR = 'http://openapi.data.go.kr/openapi/service/rest/Covid19/' + \
    'getCovid19SidoInfStateJson'
API_KEY_KR = 'vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2' + \
    'FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D'

URL_GEOJSON_AREA_KR = os.path.join(PATH_TO_SAVE_DATA, 'sources', 
                                   'skorea-provinces-geo-simple.json')

GUBUN_AGE_DICT_KR = {
    "0-9": np.mean([0, 9]),
    "10-19": np.mean([10, 19]),
    "20-29": np.mean([20, 29]),
    "30-39": np.mean([30, 39]),
    "40-49": np.mean([40, 49]),
    "50-59": np.mean([50, 59]),
    "60-69": np.mean([60, 69]),
    "70-79": np.mean([70, 79]),
    "80 이상": 85
}

DICT_NBC = dict()
DICT_AGE_POS = dict()
DICT_POS_AGE = dict()
LIST_NBC = list()
for age_curr in GUBUN_AGE_DICT_KR.values():
    label_curr = f"nbC_{age_curr}"
    LIST_NBC.append(label_curr)
    DICT_NBC[label_curr] = float(age_curr)
    DICT_AGE_POS[float(age_curr)] = f"pos_{age_curr}" 
    DICT_POS_AGE[f"pos_{age_curr}"] = float(age_curr)

LIST_AREA = ["Seoul",
"Busan",
"Daegu",
"Incheon",
"Gwangju",
"Daejeon",
"Ulsan",
"Sejong",
"Gyeonggi",
"Gangwon",
"Chungbuk",
"Chungnam",
"Jeonbuk",
"Jeonnam",
"Gyeongbuk",
"Gyeongnam",
"Jeju"]


DICT_AREA = {
    'Jeju': "Jeju",
    'Gyeongsangnam-do': "Gyeongnam",
    'Gyeongsangbuk-do': "Gyeongbuk",
    'Jeollanam-do': "Jeonnam",
    'Jeollabuk-do': "Jeonbuk",
    'Chungcheongnam-do': "Chungnam",
    'Chungcheongbuk-do': "Chungbuk",
    'Gangwon-do': "Gangwon",
    'Gyeonggi-do': "Gyeonggi",
    'Sejong': "Sejong",
    'Ulsan': "Ulsan",
    'Daejeon': "Daejeon",
    'Gwangju': "Gwangju",
    'Incheon': "Incheon",
    'Daegu': "Daegu",
    'Busan': "Busan",
    'Seoul': "Seoul"
}
    
# meteo
URL_METEO_VC = "https://weather.visualcrossing.com/" + \
    "VisualCrossingWebServices/rest/services/weatherdata/history" 
API_KEY_METEO_VC = "7XNH4XB897R3PGSKJAKU7GGFL"

DICT_COL_METEO = {"maxt": "T_max",
                  "mint": "T_min",
                  "humidity": "H_mean" ,
                  'wspd': "W_speed"
                 }
LIST_COL_METEO = list(DICT_COL_METEO.values())

# maps
with open(URL_GEOJSON_AREA_KR) as f:
    GEOJSON_KR = json.load(f)
    
LIST_NAME_GEOJSON = \
    [feat_curr["properties"]["NAME_1"] for feat_curr in GEOJSON_KR['features']]

LIST_AREA_GEOJSON = [DICT_AREA[area] for area in LIST_NAME_GEOJSON]

LIST_SUM_GEOJSON = [f"sum_{DICT_AREA[area]}" for area in LIST_NAME_GEOJSON]
LIST_RT_GEOJSON = [f"Rt_{DICT_AREA[area]}" for area in LIST_NAME_GEOJSON]

LAT_LON_KR =  {'lat':  36, 'lon': 128}
ZOOM_KR = 5.5
# HELPERS

# Utils
def update_append(df1, df2):
    '''
    Update existing rows and append new rows of 2 DataFrames
    
    df2 replace rows in df1 and add new rows not in df1 to df1 
    return a new df1
    
    '''
    df1 = df1.copy()
    df2 = df2.copy()
    index1 = df1.index
    index2 = df2.index
    
    index_update = \
        [index_curr for index_curr in index2 if index_curr in index1]
    
    index_append = \
        [index_curr for index_curr in index2 if index_curr not in index1]
    
    
    if index_update != []:
        print("updating...")
        df1.update(df2.loc[index_update])
        
    if index_append != []:
        print("appending...")
        df1 = df1.append(df2.loc[index_append], verify_integrity=True)
    
    return df1

# data plot korea
def connect_api_kr(url, date_req_start, date_req_end):
    
    date_req_start = date_req_start.replace("-","")
    date_req_end = date_req_end.replace("-","")
    
    queryParams = f'?serviceKey={API_KEY_KR}' + \
                    f'&startCreateDt={date_req_start}' + \
                    f'&endCreateDt={date_req_end}'
    
    print(url + queryParams)
    
    request = Request(url + queryParams)
    request.get_method = lambda:'GET'
    response_body = urlopen(request).read()
    
    return response_body
    
    
def connect_api_cases_kr(date_req_start, date_req_end):
    '''
    Get from URL API Gouv KR cases between 2 dates
    '''
    return connect_api_kr(URL_API_CASES_KR, date_req_start, date_req_end)

def connect_api_age_kr(date_req_start, date_req_end):
    '''
    Get from URL API Gouv KR by age between 2 dates
    '''
    return connect_api_kr(URL_API_AGE_KR, date_req_start, date_req_end)

def connect_api_area_kr(date_req_start, date_req_end):
    '''
    Get from URL API Gouv KR by area between 2 dates
    '''
    return connect_api_kr(URL_API_AREA_KR, date_req_start, date_req_end)

def convert_xml_area_kr(response_body):
    '''
    Convert into DataFrame XMl reponse from URL API Gouv KR cases by Area
    '''
    root = ElementTree.XML(response_body)
    items = root.find("body").find('items').findall("item")
    print("nb. new items: ", len(items))
    if len(items) == 0:
        print("No update.")
        return None
    
    df_area_kr = pd.DataFrame(columns=["date"], 
                                  index=[])
    for item in items:
        # area
        gubunEn = item.find("gubunEn")
        if gubunEn.text not in DICT_AREA.keys():
            continue
            
        # date
        stateDt = item.find("createDt")
        #print("stateDt:", stateDt.text)
        str_date = f'{stateDt.text[0:10]}'

        # positive cases
        incDec = item.find("incDec")
        
        # add to dataFrame
        ser_curr = pd.Series({"date": str_date, 
                       DICT_AREA[gubunEn.text]:  int(incDec.text)})
        df_area_kr = df_area_kr.append(ser_curr, ignore_index=True)

    # clean one row per date 
    df_area_kr = df_area_kr.groupby("date").max()
    df_area_kr.index.name=""
    df_area_kr.index = pd.to_datetime(df_area_kr.index)
    return df_area_kr


def convert_xml_to_df_feat_kr(response_body):
    '''
    Convert into DataFrame XMl reponse from URL API Gouv KR cases
    '''
    root = ElementTree.XML(response_body)
    #ElementTree.dump(root)
    items = root.find("body").find('items').findall("item")
    print("nb. new items: ", len(items))
    if len(items) == 0:
        print("No update.")
        return None
    
    df_feat_kr_tmp = pd.DataFrame(columns=["date", "nb_cases", "nb_tests"], 
                                  index=[])
    for item in items:
        # date
        stateDt = item.find("stateDt")
        str_date = \
            f'{stateDt.text[0:4]}-{stateDt.text[4:6]}-{stateDt.text[6:8]}'

        # nb_cases (total)
        decideCnt = item.find("decideCnt")

        # nb test (total)
        accExamCompCnt = item.find("accExamCompCnt")
        if (accExamCompCnt is None):
            accExamCompCnt_val = 0
        else:
            accExamCompCnt_val = accExamCompCnt.text

        # nb death (total)
        deathCnt = item.find("deathCnt")
        if (deathCnt is None):
            deathCnt_val = 0
        else:
            deathCnt_val = deathCnt.text

        ser_curr = pd.Series(dict(date=str_date, 
                       nb_cases=int(decideCnt.text), 
                       nb_tests=int(accExamCompCnt_val),
                       nb_deaths=int(deathCnt_val)))

        df_feat_kr_tmp = df_feat_kr_tmp.append(ser_curr, ignore_index=True)

    # clean one row per date 
    df_feat_kr_tmp = df_feat_kr_tmp.groupby("date").max()
    df_feat_kr_tmp.index.name=""
    df_feat_kr_tmp["date"] = df_feat_kr_tmp.index
    df_feat_kr_tmp.index = pd.to_datetime(df_feat_kr_tmp.index)
    return df_feat_kr_tmp

def convert_xml_age_kr(response_body):
    root = ElementTree.XML(response_body)
    #ElementTree.dump(root)
    items = root.find("body").find('items').findall("item")
    print("nb. new items: ", len(items))
    if len(items) == 0:
        print("No update.")
        return None
    
    df_age_kr = pd.DataFrame(columns=["date"], 
                                  index=[])
    for item in items:
        
        # age cat 
        gubun = item.find("gubun")
        if gubun.text not in GUBUN_AGE_DICT_KR.keys():
            continue
        
        age_cat = GUBUN_AGE_DICT_KR[gubun.text]
        
        # date
        stateDt = item.find("createDt")
        #print("stateDt:", stateDt.text)
        str_date = f'{stateDt.text[0:10]}'

        # nb_cases (total)
        confCase = item.find("confCase")

        # add to dataFrame
        ser_curr = pd.Series({"date": str_date, 
                       f"nbC_{age_cat}":  int(confCase.text)})
        df_age_kr = df_age_kr.append(ser_curr, ignore_index=True)
        
    # clean dates and interpolate if NaN :
    df_age_kr = df_age_kr.groupby("date")[LIST_NBC].sum()
    df_age_kr.index = pd.to_datetime(df_age_kr.index)
    df_age_kr = df_age_kr.resample('1D').asfreq()
    df_age_kr = df_age_kr.interpolate(method='linear', 
                                      limit_direction='forward', 
                                      axis=0)
    df_age_kr["nbC_age"] = df_age_kr.sum(axis=1)
        
        
    return df_age_kr
    

# check update ?
def check_update_df_feat_kr(date_now=None, force_update=False):
    
    if date_now is None:
        date_now = datetime.datetime.now().strftime("%Y-%m-%d")
    
    if force_update:
        return True, DATE_FIRST_CASES_GOUV_KR, \
                DATE_FIRST_CASES_GOUV_KR, date_now
    
    flag_update = True # update to be done ?
    flag_update_age = True
    
    if os.path.isfile(PATH_DF_FEAT_KR):
        df_feat_kr = pd.read_csv(PATH_DF_FEAT_KR)
        date_req_start = add_days(df_feat_kr["date"].max(), 1)
        date_req_start_age = \
            df_feat_kr[df_feat_kr["age_pos"].isna()]["date"].max()
    else:
        date_req_start = DATE_FIRST_CASES_GOUV_KR
        date_req_start_age = DATE_FIRST_CASES_GOUV_KR
    
    if date_req_start >= date_now:
            flag_update = False
            
    if date_req_start_age >= date_now:
            flag_update = False
    
    if not flag_update:
        date_req_start = None
        
    if not flag_update_age:
        date_req_start_age = None
    
    if (flag_update | flag_update_age):
        date_req_end = date_now
    else:
        date_req_end = None
    print("Updating Data KR...")
    print("update cases : ", flag_update)
    print("update age : ", flag_update_age)
    print("date_req_start: ", date_req_start)
    print("date_req_start_age: ", date_req_start_age)
    print("date_req_end: ", date_req_end)
    return flag_update, flag_update_age, \
        date_req_start, date_req_start_age, date_req_end
        

def get_update_df_feat_kr(date_now=None, force_update=False):
    '''
    Get only new data cases Gouv KR
    '''
    flag_update, flag_update_age, date_req_start, date_req_start_age, \
        date_req_end = check_update_df_feat_kr(date_now, force_update)
    
    
    if flag_update:
        response_body = connect_api_cases_kr(date_req_start, date_req_end)
        df_feat_kr_tmp = convert_xml_to_df_feat_kr(response_body)
        
        # add day_num
        df_feat_kr_tmp['day_num'] = \
        df_feat_kr_tmp["date"].astype(np.datetime64).dt.strftime("%w")
        
        # add areas
        response_body = connect_api_area_kr(date_req_start, date_req_end)
        df_area_kr = convert_xml_area_kr(response_body)
        df_feat_kr_tmp = df_feat_kr_tmp.join(df_area_kr)
        
        # add meteo
        date_req_start_meteo = max(date_req_start, DATE_FIRST_FEAT_OK_KR)
        date_req_start_meteo = max(date_req_start_meteo, date_req_start_age)
        df_meteo = connect_api_meteo(date_req_start_meteo, 
                                     date_req_end)
        # save meteo
        df_meteo.to_csv(PATH_DF_METEO_KR, index=False)
        df_feat_kr_tmp = df_feat_kr_tmp.join(df_meteo)
        
    else:
        df_feat_kr_tmp = None
    
    if flag_update_age:
        # age
        response_body = connect_api_age_kr(date_req_start_age, date_req_end)

        df_age_kr = convert_xml_age_kr(response_body)
        if df_age_kr is not None:
            if (df_feat_kr_tmp is None):
                # if update for age but not for cases, have to load old df_feat 
                df_feat_kr = load_df_feat_kr()
                df_feat_kr_tmp = df_feat_kr.loc[df_age_kr.index]

            if LIST_NBC[0] not in df_feat_kr_tmp.columns:
                print("joining...")
                df_feat_kr_tmp = df_feat_kr_tmp.join(df_age_kr)
            else:
                print("updating...")
                df_feat_kr_tmp.update(df_age_kr)
        
    return df_feat_kr_tmp


def update_df_feat_fr(date_now=None, force_update=False, force_calc=False):
    '''
    Update Df Feat with new cases from Gouv KR
    force_update : to replace existing file
    force_calc : to force redo final calculation 
    '''
    # get just new data 
    df_feat_kr_tmp = get_update_df_feat_kr(date_now, force_update)
    
    # what to do with new data ? : force to be updated totally ?
    if force_update:
        df_feat_kr = df_feat_kr_tmp
    else:
        if os.path.isfile(PATH_DF_FEAT_KR):
            df_feat_kr = load_df_feat_kr()
            if df_feat_kr_tmp is not None:
                df_feat_kr = update_append(df_feat_kr, df_feat_kr_tmp)
        else:
            df_feat_kr = df_feat_kr_tmp
        
        if (df_feat_kr_tmp is None) & (not force_calc):
            return df_feat_kr
        
        # calculate derivative values    
        df_feat_kr["pos"] = df_feat_kr["nb_cases"].diff()
        df_feat_kr["test"] = df_feat_kr["nb_tests"].diff()
    

        # calculate sum-cases
        ser_sum = calc_sum_mobile(df_feat_kr["date"], df_feat_kr["pos"], 
                                  NB_DAYS_CV)
        ser_sum.name = "sum_cases"
        df_feat_kr.drop(columns=["sum_cases"], inplace=True, errors="ignore")
        df_feat_kr = df_feat_kr.join(ser_sum)

        # calculate sum-tests
        ser_sum_t = calc_sum_mobile(df_feat_kr["date"], df_feat_kr["test"], 
                                  NB_DAYS_CV)
        ser_sum_t.name = "sum_tests"
        df_feat_kr.drop(columns=["sum_tests"], inplace=True, errors="ignore")
        df_feat_kr = df_feat_kr.join(ser_sum_t)
        
        # calculate Rt country : Rt
        ser_rt = calc_rt_from_sum(df_feat_kr["sum_cases"], NB_DAYS_CV)
        ser_rt.name = "Rt"
        df_feat_kr.drop(columns=["Rt"], inplace=True, errors="ignore")
        df_feat_kr = df_feat_kr.join(ser_rt)
        
        # caculation sums by area
        for area_curr in LIST_AREA:
            # calculate sum-cases by area : col= sum_"area"
            ser_sum = calc_sum_mobile(df_feat_kr["date"], 
                                      df_feat_kr[area_curr], 
                                          NB_DAYS_CV)
            ser_sum.name = f"sum_{area_curr}"
            df_feat_kr.drop(columns=[ser_sum.name], inplace=True, 
                            errors="ignore")
            df_feat_kr = df_feat_kr.join(ser_sum)

            # calculate Rt by area : col= Rt_"area"
            ser_rt = calc_rt_from_sum(df_feat_kr[f"sum_{area_curr}"], 
                                      NB_DAYS_CV)
            ser_rt.name = f"Rt_{area_curr}"
            df_feat_kr.drop(columns=[ser_rt.name], inplace=True, 
                            errors="ignore")
            df_feat_kr = df_feat_kr.join(ser_rt)

        # positive rate over 14 days calculation
        df_feat_kr["rate_pos"] = \
            100*df_feat_kr["sum_cases"] / df_feat_kr["sum_tests"]
        
        # age  calculation
        for nbC_curr in LIST_NBC:
            df_feat_kr[DICT_AGE_POS[DICT_NBC[nbC_curr]]] = \
                df_feat_kr[nbC_curr].diff()
        
        df_feat_kr["age_pos"] = 0
        df_feat_kr["daily_age"] = df_feat_kr["nbC_age"].diff()
        
        for age_curr in DICT_AGE_POS.keys():
            df_feat_kr["age_pos"] += df_feat_kr[DICT_AGE_POS[age_curr]]*age_curr
        
        df_feat_kr["age_pos"] /= df_feat_kr["daily_age"]
    
    return df_feat_kr

def load_df_feat_kr():
    '''
    Load DataFrame for features of South Korea
    '''
    df_feat_kr = pd.read_csv(PATH_DF_FEAT_KR)
    df_feat_kr.index = df_feat_kr["date"]
    df_feat_kr.index = pd.to_datetime(df_feat_kr.index)
    
    return df_feat_kr

def connect_api_meteo(date_req_start, date_req_end):
    
    def fun_date_meteo(str_in):
        return f"{str_in[6:10]}-{str_in[0:2]}-{str_in[3:5]}"
    def create_query(date_req_start, date_req_end):
        return "?aggregateHours=24" + \
        "&combinationMethod=aggregate" + \
        f"&startDateTime={date_req_start}T00%3A00%3A00" + \
        f"&endDateTime={date_req_end}T00%3A00%3A00" + \
        "&maxStations=-1" + \
        "&maxDistance=-1" + \
        "&shortColumnNames=true" + \
        "&sendAsDatasource=true" + \
        "&contentType=csv" + \
        "&unitGroup=metric" + \
        "&locationMode=array" + \
        f"&key={API_KEY_METEO_VC}" + \
        "&dataElements=all" + \
        "&locations=Seoul%20south%20korea" + \
        "%7Cbusan%20south%20korea" + \
        "%7CDaegu%20South%20Korea"
    
    list_dates_start, list_dates_end = create_date_range_lim(date_req_start, 
                                                         date_req_end) 
    df_meteo_kr = None
    for date_start, date_end in zip(list_dates_start, list_dates_end):  
        
        queryParams = create_query(date_start, date_end)
        print(URL_METEO_VC + queryParams)

        # Requests
        req = requests.get(URL_METEO_VC + queryParams).content
        df_meteo_kr_tmp = pd.read_csv(io.StringIO(req.decode('utf-8')), sep=",", 
            low_memory=False)
        if df_meteo_kr is None:
            df_meteo_kr = df_meteo_kr_tmp
        else:
            df_meteo_kr = df_meteo_kr.append(df_meteo_kr_tmp)
    
    df_meteo_kr.to_csv(PATH_DF_METEO_RAW_KR, index=False)
    
    # reformat cols
    df_meteo_kr.rename(columns=DICT_COL_METEO, inplace=True)
    df_meteo_kr["date"] = df_meteo_kr["datetime"].apply(fun_date_meteo)
    # calculate mean for output
    df_meteo_kr = df_meteo_kr.groupby("date")[LIST_COL_METEO].mean()
    df_meteo_kr.index = pd.to_datetime(df_meteo_kr.index)
    
    return df_meteo_kr

def create_date_range_lim(date_req_start, date_req_end, n_days=32):
    '''
    Create lists of date range limited to n_days
    '''
    date_req_start_lim = add_days(date_req_start, -1)
    list_dates = generate_list_dates(date_req_start_lim, date_req_end)
    list_dates_start = []
    list_dates_end = []

    if len(list_dates) > n_days:
        for I in range(0, len(list_dates), n_days):
            list_dates_start.append(list_dates[I])
            list_dates_end.append(min(add_days(list_dates[I], n_days-1),
                                     date_req_end))
    else:
        list_dates_start = date_req_start
        list_dates_end = date_req_end
    return list_dates_start, list_dates_end

## Update data

In [33]:
#print(response_body)
df_feat_kr = update_df_feat_fr()
clean_file(PATH_DF_FEAT_KR)
df_feat_kr.to_csv(PATH_DF_FEAT_KR, index=False)
df_feat_kr

Updating Data KR...
update cases :  True
update age :  True
date_req_start:  2020-02-01
date_req_start_age:  2020-02-01
date_req_end:  2020-12-24
http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19InfStateJson?serviceKey=vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D&startCreateDt=20200201&endCreateDt=20201224
nb. new items:  367
http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19SidoInfStateJson?serviceKey=vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D&startCreateDt=20200201&endCreateDt=20201224
nb. new items:  5952
https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history?aggregateHours=24&combinationMethod=aggregate&startDateTime=2020-04-03T00%3A00%3A00&endDateTime=2020-05-04T00%3A00%3A00&maxStations=-1&maxDistance=-1&shortColumnNames=true&sendAsDatasource=true&contentType=csv&unitGroup=metric&locationMode=array&key=


divide by zero encountered in true_divide


invalid value encountered in true_divide



File /Users/gregory/Documents/CloudStationSinchon/Applications/python/CoronaVirus/code/coronavirusModel/df_feat_kr.csv does not exist!


Unnamed: 0,nb_cases,nb_tests,nb_deaths,date,day_num,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,...,pos_14.5,pos_24.5,pos_34.5,pos_44.5,pos_54.5,pos_64.5,pos_74.5,pos_85,age_pos,daily_age
,,,,,,,,,,,,,,,,,,,,,
2020-02-02,2,0,2.0,2020-02-02,0,,,,,,...,,,,,,,,,,
2020-02-03,15,0,0.0,2020-02-03,1,,,,,,...,,,,,,,,,,
2020-02-04,0,0,0.0,2020-02-04,2,,,,,,...,,,,,,,,,,
2020-02-05,19,0,0.0,2020-02-05,3,,,,,,...,,,,,,,,,,
2020-02-06,23,0,0.0,2020-02-06,4,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-20,49665,3543619,674.0,2020-12-20,0,24.0,51.0,25.0,4.0,29.0,...,70.0,137.0,142.0,152.0,233.0,188.0,79.0,47.0,46.530594,1095.0
2020-12-21,50591,3567423,698.0,2020-12-21,1,23.0,15.0,48.0,9.0,13.0,...,45.0,97.0,117.0,124.0,195.0,161.0,87.0,54.0,48.460043,926.0
2020-12-22,51460,3621303,722.0,2020-12-22,2,19.0,18.0,59.0,5.0,4.0,...,56.0,105.0,104.0,124.0,180.0,132.0,86.0,51.0,47.981588,869.0


In [160]:
#df_feat_kr = update_df_feat_fr(date_now=None, force_update=False, 
#                               force_calc=True)

Updating Data KR...
update cases :  False
update age :  True
date_req_start:  None
date_req_start_age:  2020-09-20
date_req_end:  2020-12-24
http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19GenAgeCaseInfJson?serviceKey=vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D&startCreateDt=20200920&endCreateDt=20201224
nb. new items:  1045
updating...
updating...
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23



divide by zero encountered in true_divide


invalid value encountered in true_divide



str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23


In [161]:
df_feat_kr

Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,...,sum_Jeonbuk,Rt_Jeonbuk,sum_Jeonnam,Rt_Jeonnam,sum_Gyeongbuk,Rt_Gyeongbuk,sum_Gyeongnam,Rt_Gyeongnam,sum_Jeju,Rt_Jeju
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-02,2.0,0.0,2.0,2020-02-02,0.0,,,,,,...,,,,,,,,,,
2020-02-03,15.0,0.0,0.0,2020-02-03,1.0,,,,,,...,,,,,,,,,,
2020-02-04,0.0,0.0,0.0,2020-02-04,2.0,,,,,,...,,,,,,,,,,
2020-02-05,19.0,0.0,0.0,2020-02-05,3.0,,,,,,...,,,,,,,,,,
2020-02-06,23.0,0.0,0.0,2020-02-06,4.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-19,48570.0,3520014.0,659.0,2020-12-19,6.0,24.0,32.0,29.0,5.0,15.0,...,237.0,1.179104,59.0,0.614583,205.0,2.050000,314.0,1.297521,120.0,5.454545
2020-12-20,49665.0,3543619.0,674.0,2020-12-20,0.0,24.0,51.0,25.0,4.0,29.0,...,254.0,1.263682,55.0,0.604396,219.0,2.085714,349.0,1.460251,144.0,6.545455
2020-12-21,50591.0,3567423.0,698.0,2020-12-21,1.0,23.0,15.0,48.0,9.0,13.0,...,245.0,1.144860,61.0,0.685393,258.0,2.345455,348.0,1.375494,167.0,7.952381
2020-12-22,51460.0,3621303.0,722.0,2020-12-22,2.0,19.0,18.0,59.0,5.0,4.0,...,236.0,1.113208,59.0,0.662921,312.0,2.943396,353.0,1.342205,184.0,8.363636


In [154]:
df_feat_kr = load_df_feat_kr()
# caculation sums by area
for area_curr in LIST_AREA:
    # calculate sum-cases by area
    ser_sum = calc_sum_mobile(df_feat_kr["date"], df_feat_kr[area_curr], 
                                  NB_DAYS_CV)
    ser_sum.name = f"sum_{area_curr}"
    df_feat_kr.drop(columns=[ser_sum.name], inplace=True, errors="ignore")
    df_feat_kr = df_feat_kr.join(ser_sum)
    
    # calculate Rt country : Rt
    ser_rt = calc_rt_from_sum(df_feat_kr[f"sum_{area_curr}"], NB_DAYS_CV)
    ser_rt.name = f"Rt_{area_curr}"
    df_feat_kr.drop(columns=[ser_rt.name], inplace=True, errors="ignore")
    df_feat_kr = df_feat_kr.join(ser_rt)

df_feat_kr   

str_date_min:  2020-02-02
str_date_max:  2020-12-23



divide by zero encountered in true_divide


invalid value encountered in true_divide



str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23
str_date_min:  2020-02-02
str_date_max:  2020-12-23


Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,...,sum_Jeonbuk,Rt_Jeonbuk,sum_Jeonnam,Rt_Jeonnam,sum_Gyeongbuk,Rt_Gyeongbuk,sum_Gyeongnam,Rt_Gyeongnam,sum_Jeju,Rt_Jeju
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-02,2.0,0.0,2.0,2020-02-02,0.0,,,,,,...,,,,,,,,,,
2020-02-03,15.0,0.0,0.0,2020-02-03,1.0,,,,,,...,,,,,,,,,,
2020-02-04,0.0,0.0,0.0,2020-02-04,2.0,,,,,,...,,,,,,,,,,
2020-02-05,19.0,0.0,0.0,2020-02-05,3.0,,,,,,...,,,,,,,,,,
2020-02-06,23.0,0.0,0.0,2020-02-06,4.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-19,48570.0,3520014.0,659.0,2020-12-19,6.0,24.0,32.0,29.0,5.0,15.0,...,237.0,1.179104,59.0,0.614583,205.0,2.050000,314.0,1.297521,120.0,5.454545
2020-12-20,49665.0,3543619.0,674.0,2020-12-20,0.0,24.0,51.0,25.0,4.0,29.0,...,254.0,1.263682,55.0,0.604396,219.0,2.085714,349.0,1.460251,144.0,6.545455
2020-12-21,50591.0,3567423.0,698.0,2020-12-21,1.0,23.0,15.0,48.0,9.0,13.0,...,245.0,1.144860,61.0,0.685393,258.0,2.345455,348.0,1.375494,167.0,7.952381
2020-12-22,51460.0,3621303.0,722.0,2020-12-22,2.0,19.0,18.0,59.0,5.0,4.0,...,236.0,1.113208,59.0,0.662921,312.0,2.943396,353.0,1.342205,184.0,8.363636


## Load Data

In [7]:
df_feat_kr = load_df_feat_kr()
df_feat_kr

Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,...,pos_14.5,pos_24.5,pos_34.5,pos_44.5,pos_54.5,pos_64.5,pos_74.5,pos_85,age_pos,daily_age
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-02,2,0,2.0,2020-02-02,0,,,,,,...,,,,,,,,,,
2020-02-03,15,0,0.0,2020-02-03,1,,,,,,...,,,,,,,,,,
2020-02-04,0,0,0.0,2020-02-04,2,,,,,,...,,,,,,,,,,
2020-02-05,19,0,0.0,2020-02-05,3,,,,,,...,,,,,,,,,,
2020-02-06,23,0,0.0,2020-02-06,4,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-20,49665,3543619,674.0,2020-12-20,0,24.0,51.0,25.0,4.0,29.0,...,70.0,137.0,142.0,152.0,233.0,188.0,79.0,47.0,46.530594,1095.0
2020-12-21,50591,3567423,698.0,2020-12-21,1,23.0,15.0,48.0,9.0,13.0,...,45.0,97.0,117.0,124.0,195.0,161.0,87.0,54.0,48.460043,926.0
2020-12-22,51460,3621303,722.0,2020-12-22,2,19.0,18.0,59.0,5.0,4.0,...,56.0,105.0,104.0,124.0,180.0,132.0,86.0,51.0,47.981588,869.0
2020-12-23,52550,3673462,739.0,2020-12-23,3,33.0,29.0,25.0,12.0,21.0,...,62.0,101.0,124.0,144.0,245.0,189.0,96.0,83.0,49.556422,1090.0


## Age & Positive rate

In [147]:
df_feat_kr["rate_pos"]

date
2020-02-02         NaN
2020-02-03         NaN
2020-02-04         NaN
2020-02-05         NaN
2020-02-06         NaN
                ...   
2020-12-19    2.872211
2020-12-20    2.909947
2020-12-21    2.917450
2020-12-22    2.697874
2020-12-23    2.648464
Name: rate_pos, Length: 326, dtype: float64

In [148]:
df_feat_kr["age_pos"]

date
2020-02-02          NaN
2020-02-03          NaN
2020-02-04          NaN
2020-02-05          NaN
2020-02-06          NaN
                ...    
2020-12-19    48.498578
2020-12-20    46.530594
2020-12-21    48.460043
2020-12-22    47.981588
2020-12-23    49.556422
Name: age_pos, Length: 326, dtype: float64

In [55]:
def create_fig_pos_rate(df_feat_fr, country="France"):
    '''
    data : 
     - df_feat_fr (date,  [date, pos , test, age_pos] )

    pos_rate =  100*df_feat_fr["pos"] / df_feat_fr["test"]

    '''
    #display_msg("create_fig_pos_dep ...")

    rate_pos = df_feat_fr["rate_pos"]
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(go.Scatter(x=df_feat_fr["date"], 
        y=rate_pos.values,
        mode='lines', name="pos. rate", line=dict(color="red"),
        fill='tozeroy'), secondary_y=False)

    fig.add_trace(go.Scatter(x=df_feat_fr["date"], 
            y=df_feat_fr["age_pos"],
            mode='lines', name='pos. age', 
            line=dict(color="blue")), secondary_y=True)
    
    age_last = df_feat_fr[df_feat_fr["age_pos"].notna()]["age_pos"].values[-1]
    
    subtitle_curr = \
        f'<i>{df_feat_fr["date"].values[-1]}:</i> ' + \
        'pos. rate:<b> {:.1f}</b>'.format(rate_pos.values[-1]) + \
        " %<br>mean pos. age:<b> {:.1f}</b>".format(age_last)

    fig.update_layout(showlegend=True, font=dict(size=12),
        title=dict(text=f"Positive rate and age: <b>{country}</b><br>" + \
        subtitle_curr,
        xanchor="center", x=0.5, yanchor="top", y=0.95)
    )

    fig.update_yaxes({"color": "red",}, secondary_y=False)

    fig.update_yaxes({"color": "blue"}, secondary_y=True)
    fig.update_layout(margin={"r":0,"t":70, "l":50}) 
    fig.update_layout(legend_orientation="h", legend=dict(x=0, y=1))

    fig.add_annotation(
                x=0,
                y=-0.18,
                text="<i>Only global country Curve available<br></i>")
    fig.update_annotations(dict(
                xref="paper",
                yref="paper",
                showarrow=False
    ))
    #display_msg("create_fig_rt_dep END.")

    return fig

In [58]:
create_fig_pos_rate(df_feat_kr, "South Korea")

In [149]:
rate_pos = df_feat_kr["rate_pos"]
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=df_feat_kr["date"], 
    y=rate_pos.values,
    mode='lines', name="pos. rate", line=dict(color="red"),
    fill='tozeroy'), secondary_y=False)

fig.add_trace(go.Scatter(x=df_feat_kr["date"], 
        y=df_feat_kr["age_pos"],
        mode='lines', name='pos. age', 
        line=dict(color="blue")), secondary_y=True)

subtitle_curr = \
    f'<i>{df_feat_kr["date"].values[-1]}:</i> ' + \
    'pos. rate:<b> {:.1f}</b>'.format(rate_pos.values[-1]) + \
    " %<br>mean pos. age:<b> {:.1f}</b>".format(df_feat_kr["age_pos"] \
        .values[-1])

fig.update_layout(showlegend=True, font=dict(size=12),
    title=dict(text=f"Positive rate and age: <b>SOUTH KOREA</b><br>" + \
    subtitle_curr,
    xanchor="center", x=0.5, yanchor="top", y=0.95)
)

fig.update_yaxes({"color": "red",}, secondary_y=False)

fig.update_yaxes({"color": "blue"}, secondary_y=True)
fig.update_layout(margin={"r":0,"t":70, "l":50}) 
fig.update_layout(legend_orientation="h", legend=dict(x=0, y=1))

fig.add_annotation(
            x=0,
            y=-0.18,
            text="<i>Only global country Curve available<br></i>")
fig.update_annotations(dict(
            xref="paper",
            yref="paper",
            showarrow=False
))

## Tests API

### API for Age

In [93]:
date_req_start = '2020-12-23'
date_req_end = '2020-12-24'

response_body = connect_api_age_kr(date_req_start, date_req_end)

df_age_kr = convert_xml_age_kr(response_body)
df_age_kr

http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19GenAgeCaseInfJson?serviceKey=vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D&startCreateDt=20201223&endCreateDt=20201224
nb. new items:  11


Unnamed: 0_level_0,nbC_4.5,nbC_14.5,nbC_24.5,nbC_34.5,nbC_44.5,nbC_54.5,nbC_64.5,nbC_74.5,nbC_85,nbC_age
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-12-23,1761.0,3268.0,8758.0,6642.0,7367.0,9827.0,8204.0,4161.0,2562.0,52550.0


In [94]:
df_age_kr.index

DatetimeIndex(['2020-12-23'], dtype='datetime64[ns]', name='date', freq='D')

In [136]:
df_feat_kr = load_df_feat_kr()
df_feat_kr_tmp = df_feat_kr.loc[df_age_kr.index]

In [137]:
df_feat_kr["nbC_14.5"]

date
2020-02-02       NaN
2020-02-03       NaN
2020-02-04       NaN
2020-02-05       NaN
2020-02-06       NaN
               ...  
2020-12-19    3035.0
2020-12-20    3105.0
2020-12-21    3150.0
2020-12-22    3206.0
2020-12-23       NaN
Name: nbC_14.5, Length: 326, dtype: float64

In [138]:
#df_test = df_feat_kr.append(df_feat_kr_tmp, verify_integrity=True)
#pd.concat([df_feat_kr, df_feat_kr_tmp], join='inner', verify_integrity=True)
df_test = update_append(df_feat_kr, df_age_kr) 
df_test                                        

updating...


Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,...,pos_14.5,pos_24.5,pos_34.5,pos_44.5,pos_54.5,pos_64.5,pos_74.5,pos_85,age_pos,daily_age
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-02,2,0,2.0,2020-02-02,0,,,,,,...,,,,,,,,,,
2020-02-03,15,0,0.0,2020-02-03,1,,,,,,...,,,,,,,,,,
2020-02-04,0,0,0.0,2020-02-04,2,,,,,,...,,,,,,,,,,
2020-02-05,19,0,0.0,2020-02-05,3,,,,,,...,,,,,,,,,,
2020-02-06,23,0,0.0,2020-02-06,4,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-19,48570,3520014,659.0,2020-12-19,6,24.0,32.0,29.0,5.0,15.0,...,66.0,135.0,111.0,154.0,220.0,176.0,83.0,77.0,48.498578,1055.0
2020-12-20,49665,3543619,674.0,2020-12-20,0,24.0,51.0,25.0,4.0,29.0,...,70.0,137.0,142.0,152.0,233.0,188.0,79.0,47.0,46.530594,1095.0
2020-12-21,50591,3567423,698.0,2020-12-21,1,23.0,15.0,48.0,9.0,13.0,...,45.0,97.0,117.0,124.0,195.0,161.0,87.0,54.0,48.460043,926.0
2020-12-22,51460,3621303,722.0,2020-12-22,2,19.0,18.0,59.0,5.0,4.0,...,56.0,105.0,104.0,124.0,180.0,132.0,86.0,51.0,47.981588,869.0


In [67]:
df_feat_kr = pd.read_csv(PATH_DF_FEAT_KR)
df_feat_kr.index = df_feat_kr["date"]
print("\nbefore 1st time : ")
print(df_feat_kr[(df_feat_kr.date >= "2020-04-01") & (df_feat_kr.date <= "2020-04-11") ])

if LIST_NBC[0] not in df_feat_kr.columns:
    print("joining...")
    df_feat_kr = df_feat_kr.join(df_age_kr)
else:
    print("updating...")
    df_feat_kr.update(df_age_kr)
print("\nafter 1st time join : ")   
print(df_feat_kr[(df_feat_kr.date >= "2020-04-01") & (df_feat_kr.date <= "2020-04-11") ])

if LIST_NBC[0] not in df_feat_kr.columns:
    print("joining...")
    df_feat_kr = df_feat_kr.join(df_age_kr)
else:
    print("updating...")
    df_feat_kr.update(df_age_kr)
print("\nfinal update: ") 

print(df_feat_kr[(df_feat_kr.date >= "2020-04-01") & (df_feat_kr.date <= "2020-04-11") ])


before 1st time : 
            nb_cases  nb_tests  nb_deaths        date  day_num    age_pos  \
date                                                                        
2020-04-01      9887    404962      165.0  2020-04-01        3        NaN   
2020-04-02      9976    413858      169.0  2020-04-02        4        NaN   
2020-04-03     10062    424365      174.0  2020-04-03        5        NaN   
2020-04-04     10156    434888      177.0  2020-04-04        6        NaN   
2020-04-05     10237    441662      183.0  2020-04-05        0        NaN   
2020-04-06     10284    447509      186.0  2020-04-06        1        NaN   
2020-04-07     10331    456654      192.0  2020-04-07        2  39.547985   
2020-04-08     10384    468145      200.0  2020-04-08        3  40.724490   
2020-04-09     10423    479202      204.0  2020-04-09        4  32.000000   
2020-04-10     10450    487753      208.0  2020-04-10        5  42.576923   
2020-04-11     10480    496409      211.0  2020-04-11   

In [70]:
DICT_AGE_POS

{4.5: 'pos_4.5',
 14.5: 'pos_14.5',
 24.5: 'pos_24.5',
 34.5: 'pos_34.5',
 44.5: 'pos_44.5',
 54.5: 'pos_54.5',
 64.5: 'pos_64.5',
 74.5: 'pos_74.5',
 85.0: 'pos_85'}

In [71]:
for nbC_curr in LIST_NBC:
    df_feat_kr[DICT_AGE_POS[DICT_NBC[nbC_curr]]] = df_feat_kr[nbC_curr].diff()
df_feat_kr["age_pos"] = 0
df_feat_kr["daily_age"] = df_feat_kr["nbC_age"].diff()
for age_curr in DICT_AGE_POS.keys():
    df_feat_kr["age_pos"] += df_feat_kr[DICT_AGE_POS[age_curr]]*age_curr
df_feat_kr["age_pos"] /= df_feat_kr["daily_age"]
df_feat_kr[(df_feat_kr.date >= "2020-04-01") & (df_feat_kr.date <= "2020-04-11") ]

Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,age_pos,pos,test,sum_cases,Rt,...,pos_4.5,pos_14.5,pos_24.5,pos_34.5,pos_44.5,pos_54.5,pos_64.5,pos_74.5,pos_85,daily_age
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-01,9887,404962,165.0,2020-04-01,3,,101.0,11290.0,1474.0,0.477796,...,,,,,,,,,,
2020-04-02,9976,413858,169.0,2020-04-02,4,,89.0,8896.0,1411.0,0.504109,...,,,,,,,,,,
2020-04-03,10062,424365,174.0,2020-04-03,5,41.898165,86.0,10507.0,1410.0,0.595439,...,2.8,5.8,32.6,16.0,14.0,11.6,13.8,7.6,4.8,109.0
2020-04-04,10156,434888,177.0,2020-04-04,6,41.898165,94.0,10523.0,1357.0,0.667815,...,2.8,5.8,32.6,16.0,14.0,11.6,13.8,7.6,4.8,109.0
2020-04-05,10237,441662,183.0,2020-04-05,0,41.898165,81.0,6774.0,1340.0,0.760068,...,2.8,5.8,32.6,16.0,14.0,11.6,13.8,7.6,4.8,109.0
2020-04-06,10284,447509,186.0,2020-04-06,1,41.898165,47.0,5847.0,1323.0,0.837872,...,2.8,5.8,32.6,16.0,14.0,11.6,13.8,7.6,4.8,109.0
2020-04-07,10331,456654,192.0,2020-04-07,2,41.898165,47.0,9145.0,1294.0,0.849081,...,2.8,5.8,32.6,16.0,14.0,11.6,13.8,7.6,4.8,109.0
2020-04-08,10384,468145,200.0,2020-04-08,3,44.349057,53.0,11491.0,1247.0,0.902315,...,0.0,4.0,13.0,10.0,5.0,6.0,8.0,3.0,4.0,53.0
2020-04-09,10423,479202,204.0,2020-04-09,4,36.076923,39.0,11057.0,1182.0,0.861516,...,2.0,4.0,12.0,7.0,7.0,2.0,2.0,0.0,3.0,39.0
2020-04-10,10450,487753,208.0,2020-04-10,5,44.518519,27.0,8551.0,1118.0,0.826312,...,1.0,1.0,7.0,4.0,2.0,3.0,6.0,2.0,1.0,27.0


In [27]:
df_feat_kr

In [20]:
LIST_NBC[0]

'nbC_4.5'

### API for  cases by area

In [31]:
date_req_start = '2020-12-20'
date_req_end = '2020-12-23'
response_body = connect_api_area_kr(date_req_start, date_req_end)
df_area_kr = convert_xml_area_kr(response_body)
df_area_kr

http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19SidoInfStateJson?serviceKey=vdvTqgH%2ByZyoebTbIuQVedRNSnB9aP0IuNFfD4uIRnhALu4%2FUkxCDZSHp2Qx2S4IOfN3P3nJCQJbTbxk%2FdMAlA%3D%3D&startCreateDt=20201220&endCreateDt=20201223
nb. new items:  153


Unnamed: 0,Jeju,Gyeongnam,Gyeongbuk,Jeonnam,Jeonbuk,Chungnam,Chungbuk,Gangwon,Gyeonggi,Sejong,Ulsan,Daejeon,Gwangju,Incheon,Daegu,Busan,Seoul
,,,,,,,,,,,,,,,,,
2020-12-20,24.0,51.0,25.0,4.0,29.0,20.0,14.0,55.0,249.0,1.0,6.0,3.0,14.0,62.0,25.0,33.0,473.0
2020-12-21,23.0,15.0,48.0,9.0,13.0,16.0,33.0,25.0,244.0,0.0,17.0,3.0,13.0,88.0,21.0,20.0,328.0
2020-12-22,19.0,18.0,59.0,5.0,4.0,17.0,31.0,23.0,206.0,1.0,6.0,9.0,26.0,45.0,39.0,26.0,317.0
2020-12-23,33.0,29.0,25.0,12.0,21.0,11.0,58.0,13.0,310.0,1.0,18.0,35.0,36.0,49.0,24.0,32.0,376.0


In [19]:
df_feat_kr[(df_feat_kr.date >= "2020-04-01") & (df_feat_kr.date <= "2020-04-11") ]

Unnamed: 0_level_0,nb_cases,nb_tests,nb_deaths,date,day_num,age_pos,pos,test,sum_cases,Rt,rate_pos,nbC_4.5,nbC_14.5,nbC_24.5,nbC_34.5,nbC_44.5,nbC_54.5,nbC_64.5,nbC_74.5,nbC_85
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-01,9887,404962,165.0,2020-04-01,3,,101.0,11290.0,1474.0,0.477796,0.894597,,,,,,,,,
2020-04-02,9976,413858,169.0,2020-04-02,4,,89.0,8896.0,1411.0,0.504109,1.00045,112.0,515.0,2656.0,1012.0,1312.0,1851.0,1235.0,651.0,442.0
2020-04-03,10062,424365,174.0,2020-04-03,5,,86.0,10507.0,1410.0,0.595439,0.818502,,,,,,,,,
2020-04-04,10156,434888,177.0,2020-04-04,6,,94.0,10523.0,1357.0,0.667815,0.893281,,,,,,,,,
2020-04-05,10237,441662,183.0,2020-04-05,0,,81.0,6774.0,1340.0,0.760068,1.195748,,,,,,,,,
2020-04-06,10284,447509,186.0,2020-04-06,1,,47.0,5847.0,1323.0,0.837872,0.803831,,,,,,,,,
2020-04-07,10331,456654,192.0,2020-04-07,2,39.547985,47.0,9145.0,1294.0,0.849081,0.513942,126.0,544.0,2819.0,1092.0,1382.0,1909.0,1304.0,689.0,466.0
2020-04-08,10384,468145,200.0,2020-04-08,3,40.72449,53.0,11491.0,1247.0,0.902315,0.461231,126.0,548.0,2832.0,1102.0,1387.0,1915.0,1312.0,692.0,470.0
2020-04-09,10423,479202,204.0,2020-04-09,4,32.0,39.0,11057.0,1182.0,0.861516,0.352718,128.0,552.0,2844.0,1109.0,1394.0,1917.0,1314.0,692.0,473.0
2020-04-10,10450,487753,208.0,2020-04-10,5,42.576923,27.0,8551.0,1118.0,0.826312,0.315753,129.0,553.0,2851.0,1113.0,1396.0,1920.0,1320.0,694.0,474.0


In [22]:
b'80 \xec\x9d\xb4\xec\x83\x81'.decode("utf-8", "strict")

'80 이상'

In [98]:
b'80 \xec\x9d\xb4\xec\x83\x81'.decode("utf-8", "strict")

'80 이상'

In [23]:
b'\xec\x97\xac\xec\x84\xb1'.decode("utf-8", "strict")

'여성'

In [26]:
b'\xeb\x82\xa8\xec\x84\xb1'.decode("utf-8", "strict")

'남성'

In [22]:
b'\xea\xb2\x80\xec\x97\xad'.decode("utf-8", "strict")

'검역'

## API for meteo

In [31]:
date_req_start = DATE_FIRST_FEAT_OK_KR
date_req_end = "2020-12-04"

list_dates_start, list_dates_end = create_date_range_lim(date_req_start, 
                                                         date_req_end)
for date_start, date_end in zip(list_dates_start, list_dates_end):
    print(f"{date_start} - {date_end}")

2020-04-03 - 2020-05-04
2020-05-05 - 2020-06-05
2020-06-06 - 2020-07-07
2020-07-08 - 2020-08-08
2020-08-09 - 2020-09-09
2020-09-10 - 2020-10-11
2020-10-12 - 2020-11-12
2020-11-13 - 2020-12-04


In [200]:
date_req_start = '2020-12-01'
date_req_end = '2020-12-02'
df_meteo_kr = connect_api_meteo(date_req_start, date_req_end)
df_meteo_kr

https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history?aggregateHours=24&combinationMethod=aggregate&startDateTime=2020-12-01T00%3A00%3A00&endDateTime=2020-12-02T00%3A00%3A00&maxStations=-1&maxDistance=-1&shortColumnNames=true&sendAsDatasource=true&contentType=csv&unitGroup=metric&locationMode=array&key=7XNH4XB897R3PGSKJAKU7GGFL&dataElements=all&locations=Seoul%20south%20korea%7Cbusan%20south%20korea%7CDaegu%20South%20Korea


Unnamed: 0_level_0,T_max,T_min,H_mean,W_speed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-01,9.266667,-3.666667,43.033333,12.733333
2020-12-02,9.3,0.466667,40.986667,14.3


In [190]:
df_meteo_kr.columns

Index(['address', 'latitude', 'longitude', 'name', 'resolvedAddress',
       'datetime', 'maxt', 'mint', 'temp', 'dew', 'windchill', 'heatindex',
       'precip', 'precipcover', 'snow', 'snowdepth', 'wspd', 'wgust', 'wdir',
       'visibility', 'cloudcover', 'humidity', 'sealevelpressure',
       'solarradiation', 'solarenergy', 'weathertype', 'info', 'conditions'],
      dtype='object')

In [191]:
# get Tmin mean, T max mean, humidity mean, wspd mean, reformat date
df_meteo_kr

Unnamed: 0_level_0,T_max,T_min,H_mean,W_speed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-01,9.266667,-3.666667,43.033333,12.733333
2020-12-02,9.3,0.466667,40.986667,14.3


## GEOJSON for map

In [70]:
with open(URL_GEOJSON_AREA_KR) as f:
    area_kr = json.load(f)

# example : 
# area_kr['features'][0]["properties"]["NAME_1"]
# area_kr['features'][0]['geometry']["coordinates"]



In [71]:
area_kr['features'][0]["properties"]["NAME_1"]

'Busan'

In [72]:
area_kr['features'][1]["properties"]["NAME_1"]

'Chungcheongbuk-do'

In [73]:
area_kr['features'][1]["properties"]

{'ID_0': 211,
 'ISO': 'KOR',
 'NAME_0': 'South Korea',
 'ID_1': 2,
 'NAME_1': 'Chungcheongbuk-do',
 'NL_NAME_1': '???? | ????',
 'VARNAME_1': "Chungchongbuk-Do|Chungcheongbugdo|Ch'ungch'ong-bukto|Chusei Hoku-do|North Chungchong|Ch'ungch'ong-bukto",
 'TYPE_1': 'Do',
 'ENGTYPE_1': 'Province'}

In [75]:
area_kr['features'][0]['geometry']["coordinates"]

[[[[128.83041381835994, 35.06708145141613],
   [128.79652404785156, 35.05402755737305],
   [128.8106994628913, 35.018749237060774],
   [128.85153198242222, 35.042362213134766],
   [128.83041381835994, 35.06708145141613]]],
 [[[129.2996826171875, 35.386108398437784],
   [129.288330078125, 35.394287109375114],
   [129.280517578125, 35.395080566406364],
   [129.26849365234375, 35.38812255859381],
   [129.256103515625, 35.3873291015625],
   [129.24647521972702, 35.38944244384771],
   [129.20788574218784, 35.35028076171875],
   [129.15570068359386, 35.3428955078125],
   [129.10879516601574, 35.3140869140625],
   [129.0825042724614, 35.284709930420036],
   [129.04290771484398, 35.26928710937506],
   [129.0054931640625, 35.2103271484375],
   [128.9934082031251, 35.20428466796881],
   [128.97991943359375, 35.200500488281364],
   [128.91571044921875, 35.19531250000023],
   [128.90069580078136, 35.16210937500023],
   [128.90191650390625, 35.15087890625006],
   [128.87072753906295, 35.14611816406

In [76]:
# ex: area_kr['features'][0]["properties"]["NAME_1"]
list_name_1 = \
    [feat_curr["properties"]["NAME_1"] for feat_curr in area_kr['features']]
list_name = [DICT_AREA[area] for area in list_name_1]
list_name

['Busan',
 'Chungbuk',
 'Chungnam',
 'Daegu',
 'Daejeon',
 'Gangwon',
 'Gwangju',
 'Gyeonggi',
 'Gyeongbuk',
 'Gyeongnam',
 'Incheon',
 'Jeju',
 'Jeonbuk',
 'Jeonnam',
 'Seoul',
 'Ulsan']

In [77]:
df_feat_kr.filter(list_name).iloc[-1]

Busan         32.0
Chungbuk      58.0
Chungnam      11.0
Daegu         24.0
Daejeon       35.0
Gangwon       13.0
Gwangju       36.0
Gyeonggi     310.0
Gyeongbuk     25.0
Gyeongnam     29.0
Incheon       49.0
Jeju          33.0
Jeonbuk       21.0
Jeonnam       12.0
Seoul        376.0
Ulsan         18.0
Name: 2020-12-23 00:00:00, dtype: float64

In [78]:
list_name_1

['Busan',
 'Chungcheongbuk-do',
 'Chungcheongnam-do',
 'Daegu',
 'Daejeon',
 'Gangwon-do',
 'Gwangju',
 'Gyeonggi-do',
 'Gyeongsangbuk-do',
 'Gyeongsangnam-do',
 'Incheon',
 'Jeju',
 'Jeollabuk-do',
 'Jeollanam-do',
 'Seoul',
 'Ulsan']

In [79]:
area_kr['features'][0]["properties"]["NAME_1"]

'Busan'

In [84]:
# Initialize figure
import plotly.graph_objects as go

zoom_kr = 4.5
lat_lon_kr =  {'lat':  35, 'lon': 128}
fig = go.Figure()

# Add Traces
fig.add_trace(
    go.Choroplethmapbox(geojson=area_kr, name="positive",
                                locations=list_name_1, 
                                featureidkey="properties.NAME_1",
                                z=df_feat_kr.filter(list_name).iloc[-1].values))

fig.update_layout(mapbox_style="carto-positron",
                 mapbox_zoom=zoom_kr, mapbox_center = lat_lon_kr)

## Figures for App

### Confirmed cases

#### Map

In [32]:
def create_fig_map_kr(df_feat_kr, list_col, label):
    '''Graph Rt map France
    figure map of confirmed / testers and reproduction number by "départements"
     data : 
     - dep_fr (geo json )
     - pt_fr_test_last : pivot table : sum up last 14 days of confirmed cases
    '''
    #display_msg("create_fig_map_kr...")
    
    str_date_last = df_feat_kr.index[-1].strftime("%Y-%m-%d")

    # Initialize figure
    fig = go.Figure()

    # Add Traces
    fig.add_trace(
        go.Choroplethmapbox(geojson=GEOJSON_KR, name="positive",
                    locations=LIST_NAME_GEOJSON, 
                    featureidkey="properties.NAME_1",
                    z=df_feat_kr.filter(list_col).iloc[-1].values,
                    marker_opacity=0.7, marker_line_width=0))

    annot_conf=[dict( \
        text="South Korea : " + label + f" (up to {str_date_last})", 
                    x=0, xref="paper", y=1, yref="paper",
                                align="left", showarrow=False,
                    bgcolor="#FFFFFF")]


    fig.update_layout(mapbox_style="carto-positron",
                    mapbox_zoom=ZOOM_KR, mapbox_center = LAT_LON_KR)

    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.update_layout(annotations=annot_conf)

    fig.update_traces(colorbar=dict(thicknessmode="pixels", thickness=10,
        len=0.8,
        x=0.9,
        xanchor="left",
        xpad=0),
        selector=dict(type='choroplethmapbox'))

    #display_msg("create_fig_map_kr END.")
    return fig

In [34]:
create_fig_map_kr(df_feat_kr, LIST_SUM_GEOJSON, "<b>Confirmed</b> " + \
            "cases : Sum of last 14 days")

In [45]:
def figure_pos(ser_pos, ser_sum_pos, dep_curr, rt_last):
    '''
    Figure creation positive daily and sum-mobile
    '''
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=ser_pos.index, y=ser_pos.values,
                mode='lines', name="daily", line=dict(color="red"),
                fill='tozeroy'), secondary_y=False)

    fig.add_trace(go.Scatter(x=ser_sum_pos.index, y=ser_sum_pos.values,
                mode='lines', name='14-days-sum', 
                line=dict(color="blue")), secondary_y=True)

    fig.add_annotation(
                x=0,
                y=-0.18,
                text="<i>Click on Map to Update this Curve<br>" + \
                    "Curve for global country not available...</i>")
    fig.update_annotations(dict(
                xref="paper",
                yref="paper",
                showarrow=False
    ))

    subtitle_curr = \
        f'<i>{ser_pos.index[-1]}:</i> ' + \
        'Rt: <b>{:.2f}</b>'.format(rt_last) + \
        "<br>14days-sum:<b> {:.0f}</b>".format(ser_sum_pos.values[-1])

    fig.update_layout(showlegend=True, font=dict(size=12),
        title=dict(text=f"New cases: <b>{dep_curr}</b><br>" + \
        subtitle_curr,
        xanchor="center", x=0.5, yanchor="top", y=0.95)
    )

    fig.update_yaxes({"color": "red",}, secondary_y=False)

    fig.update_yaxes({"color": "blue"}, secondary_y=True) 
    fig.update_layout(margin={"r":0,"t":70, "l":50}) 
    fig.update_layout(legend_orientation="h", legend=dict(x=0, y=1))
    
    return fig


#### South Korea

In [46]:
dep_curr = "South Korea"
ser_pos = df_feat_kr["pos"]
ser_sum_pos = df_feat_kr["sum_cases"]
rt_last = df_feat_kr["Rt"].values[-1]
figure_pos(ser_pos, ser_sum_pos, dep_curr, rt_last)

#### Seoul

In [47]:
dep_curr = "Seoul"
ser_pos = df_feat_kr[dep_curr]
ser_sum_pos = df_feat_kr[f"sum_{dep_curr}"]
rt_last = df_feat_kr[f"Rt_{dep_curr}"].values[-1]
figure_pos(ser_pos, ser_sum_pos, dep_curr, rt_last)

### Rt

In [48]:
4870/3072

1.5852864583333333

#### Map

In [35]:
create_fig_map_kr(df_feat_kr, LIST_RT_GEOJSON, "<b>Rt</b> " + \
            "estimated for last 14 days")

#### South Korea

In [36]:
def figure_rt(ser_rt, dep_curr, sum_pos_last, country="France"):

    # color calculation
    if (ser_rt.values[-1] > 1) & \
    (sum_pos_last > 400):
        color_curr = "red"
    elif (ser_rt.values[-1] > 1):
        color_curr = "orange"
    else:
        color_curr = "blue"
        
    # create figure
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=ser_rt.index, y=ser_rt.values,
                mode='lines', name=dep_curr, line=dict(color=color_curr),
                fill='tozeroy'))

    fig.add_trace(go.Scatter(x=[ser_rt.index[0], 
                                    ser_rt.index[-1]], 
                                y=[1,1],
                                mode='lines', 
                                line=dict(color="red", dash='dash'),
                                hoverinfo="skip"))

    fig.add_annotation(
                x=0,
                y=-0.18,
                text="<i>Click on Map to Update this Curve<br> " + \
                    f'or Click on "{country}" ' + \
                    "button for global country Curve</i>")
    fig.update_annotations(dict(
                xref="paper",
                yref="paper",
                showarrow=False
    ))

    subtitle_curr = "Rt: " + \
                    "<b>{:.2f}</b> ".format(ser_rt.values[-1]) + \
                    'on {}<br>'.format(ser_rt.index[-1])  + \
                    f"sum cases: <b>{sum_pos_last}</b> (last 14 days)"

    fig.update_layout(
        title=dict(text="<b>Reprod. nb.</b>: <b>{}</b>".format(dep_curr) + \
            '<br>' + subtitle_curr, 
            xanchor="center", x=0.5, yanchor="top", y=0.95),
        yaxis_title='Rt',
        showlegend=False,
        font=dict(
            size=12,
        )
    )

    fig.update_yaxes(title_standoff=0)
    return fig


In [41]:
dep_curr = "South Korea"
ser_rt = df_feat_kr[df_feat_kr["date"] >= "2020-03-19"]["Rt"]
sum_pos_last = df_feat_kr["sum_cases"].values[-1]
figure_rt(ser_rt, dep_curr, sum_pos_last, country="South Korea")

#### Seoul

In [43]:
dep_curr = "Seoul"
ser_rt = df_feat_kr[df_feat_kr["date"] >= "2020-03-19"][f"Rt_{dep_curr}"]
sum_pos_last = df_feat_kr[f"sum_{dep_curr}"].values[-1]
figure_rt(ser_rt, dep_curr, sum_pos_last, country="South Korea")