In [87]:
## Eventually, I'd like to create a program which automatically pulls data from 
## the Japanese Ministry of Health, Labor and Welfare and imports it into a dashboard.

## Import packages
import pandas as pd
import numpy as np
import datetime as dt
import ssl

## Setting Options for Pandas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [96]:
## Data Sources
ssl._create_default_https_context = ssl._create_unverified_context
dataCases = 'http://covid19.mhlw.go.jp/public/opendata/newly_confirmed_cases_daily.csv'
dataSevere = 'http://covid19.mhlw.go.jp/public/opendata/severe_cases_daily.csv'
dataDeaths = 'http://covid19.mhlw.go.jp/public/opendata/deaths_cumulative_daily.csv'

## First day of each COVID wave in Datetime format
dtWaves = [dt.datetime(2020, 7, 1),dt.datetime(2020, 11, 1),dt.datetime(2021, 3, 1),
           dt.datetime(2021, 7, 1),dt.datetime(2022, 1, 1),dt.datetime(2022, 6, 1),
           dt.datetime(2029, 6, 1)]

In [97]:
## Create and Join Data Frames
dfCases = pd.read_csv(dataCases)[['Date','ALL']]
dfCases.rename(columns = {'ALL':'newCases'}, inplace = True)

dfSevere = pd.read_csv(dataSevere)[['Date','ALL']]
dfSevere.rename(columns = {'ALL':'totalSevere'}, inplace = True)

dfDeaths = pd.read_csv(dataDeaths)[['Date','ALL']]
dfDeaths.rename(columns = {'ALL':'cumDeaths'}, inplace = True)

df = dfCases.join(dfSevere.set_index('Date'), on="Date", how='inner')
df2 = df.join(dfDeaths.set_index('Date'), on="Date", how='inner')

In [98]:
## Confirm Data is looking good
print(df2.head())
print(df2.shape)

          Date  newCases  totalSevere  cumDeaths
114   2020/5/9       108          267        613
115  2020/5/10        66          249        621
116  2020/5/11        58          243        643
117  2020/5/12        87          259        668
118  2020/5/13        55          245        687
(843, 4)


In [99]:
## Inputting each wave into the dataframe, using the dtWaves array above
def wave(row):
    for i in range(0,len(waves)):
        if dt.datetime.strptime(row['Date'], '%Y/%m/%d') >= dtWaves[i]:
            if dt.datetime.strptime(row['Date'], '%Y/%m/%d') < dtWaves[i + 1]:
                return "Wave " + str(i+2)
        else:
            return "Wave 1"

df2['wave'] = df2.apply(wave, axis=1)
print(df2)

           Date  newCases  totalSevere  cumDeaths    wave
114    2020/5/9       108          267        613  Wave 1
115   2020/5/10        66          249        621  Wave 1
116   2020/5/11        58          243        643  Wave 1
117   2020/5/12        87          259        668  Wave 1
118   2020/5/13        55          245        687  Wave 1
119   2020/5/14        99          237        710  Wave 1
120   2020/5/15        55          232        725  Wave 1
121   2020/5/16        56          230        744  Wave 1
122   2020/5/17        29          228        749  Wave 1
123   2020/5/18        30          213        763  Wave 1
124   2020/5/19        32          210        771  Wave 1
125   2020/5/20        37          195        777  Wave 1
126   2020/5/21        45          176        796  Wave 1
127   2020/5/22        30          174        808  Wave 1
128   2020/5/23        30          168        820  Wave 1
129   2020/5/24        40          165        830  Wave 1
130   2020/5/2

In [100]:
## Adding column for new deaths per day
newDeaths = []
last = 613
for i in df2["cumDeaths"]:
    newDeaths.append(i - last)
    last = i

df2.insert(4, "newDeaths", newDeaths, True)
print(df2)

           Date  newCases  totalSevere  cumDeaths  newDeaths    wave
114    2020/5/9       108          267        613          0  Wave 1
115   2020/5/10        66          249        621          8  Wave 1
116   2020/5/11        58          243        643         22  Wave 1
117   2020/5/12        87          259        668         25  Wave 1
118   2020/5/13        55          245        687         19  Wave 1
119   2020/5/14        99          237        710         23  Wave 1
120   2020/5/15        55          232        725         15  Wave 1
121   2020/5/16        56          230        744         19  Wave 1
122   2020/5/17        29          228        749          5  Wave 1
123   2020/5/18        30          213        763         14  Wave 1
124   2020/5/19        32          210        771          8  Wave 1
125   2020/5/20        37          195        777          6  Wave 1
126   2020/5/21        45          176        796         19  Wave 1
127   2020/5/22        30         

In [102]:
## Adding column for average number of weekly cases
weeklyAvgCases = []
avg = []

for i in df2["newCases"]:
    if len(avg) < 7:
        avg.append(i)
        weeklyAvgCases.append(None)
    else:
        del avg[0]
        avg.append(i)
        weeklyAvgCases.append(round(np.average(avg,axis=0),1))

df2.insert(2, "weeklyAvgCases", weeklyAvgCases, True)
print(df2)

           Date  newCases  weeklyAvgCases  totalSevere  cumDeaths  newDeaths  \
114    2020/5/9       108             NaN          267        613          0   
115   2020/5/10        66             NaN          249        621          8   
116   2020/5/11        58             NaN          243        643         22   
117   2020/5/12        87             NaN          259        668         25   
118   2020/5/13        55             NaN          245        687         19   
119   2020/5/14        99             NaN          237        710         23   
120   2020/5/15        55             NaN          232        725         15   
121   2020/5/16        56            68.0          230        744         19   
122   2020/5/17        29            62.7          228        749          5   
123   2020/5/18        30            58.7          213        763         14   
124   2020/5/19        32            50.9          210        771          8   
125   2020/5/20        37            48.

In [104]:
## Adding column for average number of weekly deaths
weeklyAvgDeaths = []
avg = []

for i in df2["newDeaths"]:
    if len(avg) < 7:
        avg.append(i)
        weeklyAvgDeaths.append(None)
    else:
        del avg[0]
        avg.append(i)
        weeklyAvgDeaths.append(round(np.average(avg,axis=0),1))

df2.insert(6, "weeklyAvgDeaths", weeklyAvgDeaths, True)
print(df2)

           Date  newCases  weeklyAvgCases  totalSevere  cumDeaths  newDeaths  \
114    2020/5/9       108             NaN          267        613          0   
115   2020/5/10        66             NaN          249        621          8   
116   2020/5/11        58             NaN          243        643         22   
117   2020/5/12        87             NaN          259        668         25   
118   2020/5/13        55             NaN          245        687         19   
119   2020/5/14        99             NaN          237        710         23   
120   2020/5/15        55             NaN          232        725         15   
121   2020/5/16        56            68.0          230        744         19   
122   2020/5/17        29            62.7          228        749          5   
123   2020/5/18        30            58.7          213        763         14   
124   2020/5/19        32            50.9          210        771          8   
125   2020/5/20        37            48.