# Fetching data from remote sites - https://reurl.cc/exo167

- [COVID-19 Dashboard by JHU](https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6)
- [Taiwan CDC Dashboard](https://sites.google.com/cdc.gov.tw/2019ncov/taiwan)

## Import depencies and libaries

In [5]:
try:
    from google.colab import drive, files
    in_colab = True
except ModuleNotFoundError:
    in_colab = False

if in_colab:
    home_dir = ''
    drive.mount('/content/drive')
    groot_dir = '/content/drive/My Drive/adventures/'
else:
    from pathlib import Path
    home_dir = str(Path.home())
    groot_dir = home_dir + '/Google Drive/adventures/'

import matplotlib as mpl
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=14)
mpl.rc('ytick', labelsize=14)

from datetime import datetime
from dateutil.relativedelta import *
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import math
import os
import sys
import gdown
import requests
# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")
from pandas.plotting import register_matplotlib_converters

figure_dir = groot_dir + 'figure/'
covid_dir = groot_dir + 'covid19/'
dataroot_dir = groot_dir

gfigure = lambda name: figure_dir + name + '.png'

output_fig = lambda name: plt.savefig( gfigure(name), dpi = 300)

local_time = lambda x, offset: x + relativedelta(hours= offset)

def local_now(hours = 8):
    return datetime.now() + relativedelta(hours = hours if in_colab else 0)

def print_now():
    return print(local_now())

def DropboxLink(did, fname):
    return 'https://dl.dropboxusercontent.com/s/%s/%s' % \
    (did, fname)
    
def fetch_gdrive_file(fid, local_save):
    remote_url = 'https://drive.google.com/uc?id=' + fid
    gdown.download(remote_url, local_save, quiet = False)

def fetch_file_via_requests(url, save_in_dir):
    local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter below
    output_fpath = save_in_dir + local_filename
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(output_fpath, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # f.flush()
    return output_fpath

TAIWAN_CSV = '1I5yqulrZSHPSQkxT3oqt_3uVAhPolOEP'
JHU_CSSE = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/'
TITANIC_TRAIN = '1PrxmUKRQWSlYgtMU13l1E0ob4hVJI20O'
MNIST_TRAIN = '1E-uJ0zqqAfpsVjoOSzqF5TXhDfPNlkQ5'
MNIST_TRAIN_LABEL = '13clNJ2cd2I90W3DEkDBKjZSDNNEqqx3B'
MNIST_TEST = '1zVpVHJl5YABa3qExt1K-O3WaEHXTJekg'
MNIST_TEST_LABEL = '1qci_-dqubnRN-cdrCsbYaUAxyO7_jH9z'

print('ML project #1 - how to fetch date from remote site')
print('\nRunning on %s' % sys.platform)
print('Python Version', sys.version)
print('\nData storage points to ==>', groot_dir)
 
print('\nLibraries and dependenciess imported')
print_now()

ML project #1 - how to fetch date from remote site

Running on darwin
Python Version 3.8.5 (v3.8.5:580fbb018f, Jul 20 2020, 12:11:27) 
[Clang 6.0 (clang-600.0.57)]

Data storage points to ==> /Users/roger/Google Drive/adventures/

Libraries and dependenciess imported
2020-08-06 15:43:37.957179


## helpers for fetching remote files

In [None]:
import gdown, requests

def fetch_gdrive_file(fid, local_save):
    remote_url = 'https://drive.google.com/uc?id=' + fid
    gdown.download(remote_url, local_save, quiet = False)


def fetch_file_via_requests(url, save_in_dir):
    local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter below
    output_fpath = save_in_dir + local_filename
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(output_fpath, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # f.flush()
    return output_fpath
print('helpers imported')
print_now()   

# ① Fetch files by gdown or requests 

## 1.1 - Download JHU CSSE data files (下載 JHU 每日更新資料)

- [Coronavirus COVID-19 Global Cases by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (JHU)](https://gisanddata.maps.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6)
- [Data on Github](https://bit.ly/2Alrmgp)

In [6]:
# shorten_jhu_github = https://bit.ly/2Alrmgp

github = JHU_CSSE

csv_holder = 'time_series_covid19_%s_global.csv'

csvlist = ['confirmed', 'deaths', 'recovered']

jhudata = lambda x: csv_holder % x

put_data_here = os.path.join(groot_dir, 'covid19') + '/'

print('defintion loaded')
print_now()

defintion loaded
2020-08-06 15:43:49.857765


## 1.2 - Take a look at the downloaded files

## 1.3 - Select required contents from JHU data

### 1.3.1 Worldwide statistics

### 1.3.2 How to select required rows

## 1.4 - Investigate Data

### 1.4.1 pandas plot

### 1.4.2 matplotlib pyplot

## 1.5 - Plot Practice: Active Trend

- [seaborn set_style](https://seaborn.pydata.org/generated/seaborn.set_style.html)
- [Controlling figure aesthetics](https://seaborn.pydata.org/tutorial/aesthetics.html)
- [sns.reset_defaults()](https://seaborn.pydata.org/generated/seaborn.reset_defaults.html?highlight=reset#seaborn.reset_defaults) 

### 1.5.1 pandas.plot.line() 

### 1.5.2 seaborn

## 1.6 New Case Trend

# ② Manual Works

## 2.1 - Taiwan.csv

- [疫情數據全報導](https://udn.com/news/COVID19)：此報導會每日更新網頁內容
- [pandas.read_csv](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html)

**補充說明：**

根據 2020-08-01 衛生福利部疾病管制署記者會公佈資料：

> 指揮中心統計，國內截至目前累計81,826例(含80,728例排除)，其中474例確診，分別為382例境外移入，55例本土病例、36例敦睦艦隊及1例待釐清。

因此 taiwan.csv 需資料增加一個欄位 tbd (to_be_decided)，新的版本更名為 taiwan_aug.csv，以下使用的範例資料檔案，為更名前的最後一個檔案，因此資料到 2020-07-31 為止。

In [None]:
# 下載最後一個舊版 taiwan.csv 

fetch_file_via_requests(
    DropboxLink('upr5klwy07bba3h', 'tawiwan.csv'), covid_dir
)

'/Users/roger/Google Drive/adventures/covid19/tawiwan.csv'

In [None]:
csv_path = os.path.join(covid_dir, 'taiwan.csv')

## 2.2 Trending Chart

In [None]:
#  tdf.Taiwan - tdf.Deaths - tdf.Recovered
# 'Active Case = Accumulated Infection - Deaths - Recovered'


## 2.3 - Daily Cases

## 2.4 - Complicated tw_newcase()

In [None]:
def tw_newcase():

    dsource = u'Data Source: Taiwan CDC, UDN & PTT'
    durl = 'https://bit.ly/3dj7zgg, https://bit.ly/33AQlGr & https://reurl.cc/xZgqA5'

    ddate = '[ %s ~ %s ]' %  \
        (str(tdf.index[0]).split(' ')[0] , str(tdf.index[-1]).split(' ')[0])    
    to_watch = 'Taiwan'
    window_size = 1
    
    new_confirmed = tdf[to_watch].diff(1)
    r7 = tdf[to_watch].diff(1).rolling(7).mean()

    fig, ax = plt.subplots(figsize = (18, 12))

    ax.bar(tdf.index[window_size:-7], tdf.Local.diff(1)[window_size:-7], 
        label = 'Daily new "Locally-acquired" before %s' % str(tdf.index[-8]).split(' ')[0], 
        color = 'teal' , alpha = 0.2)
    
    ax.bar(tdf.index[window_size:-7], tdf.Panshi.diff(1)[window_size:-7], 
        label = 'Daily new "Panshi" before %s' % str(tdf.index[-8]).split(' ')[0], 
        bottom = tdf.Local.diff(1)[window_size:-7],
        color = 'darkgray' , alpha = 0.6)   
    
    ax.bar(tdf.index[window_size:-7], tdf.Imported.diff(1)[window_size:-7], 
        bottom = tdf.Local.diff(1)[window_size:-7] + tdf.Panshi.diff(1)[window_size:-7], 
        label = 'Daily new "Imported" before %s' % str(tdf.index[-8]).split(' ')[0], 
        alpha = 0.3, color = 'orange')
    
    ax.bar(tdf.index[-7:], tdf.Local.diff(1)[-7:],  
        label = 'Locally-acquired in the last 7 days: %d' % \
        ( tdf.Local[-1] - tdf.Local[-8]),
        color = 'navy' , alpha = 0.25)
    
    ax.bar(tdf.index[-7:], tdf.Panshi.diff(1)[-7:],  
        label = 'Panshi in the last 7 days: %d' % \
        ( tdf.Panshi[-1] - tdf.Panshi[-8]),
        bottom = tdf.Local.diff(1)[-7:],
        color = 'black' , alpha = 0.7)
    
    ax.bar(tdf.index[-7:], tdf.Imported.diff(1)[-7:], 
        label = 'Imported in the last 7 days: %d' % \
        ( tdf.Imported[-1] - tdf.Imported[-8]),
        bottom = tdf.Local.diff(1)[-7:] + tdf.Panshi.diff(1)[-7:], alpha = 0.5, color = 'darkorange')

    ax.plot(r7, color = 'brown', lw = 2, ls = '--', label = '7-days average')
    ax.axvline(x = tdf.index[-8], ls = '-.', color = 'brown', alpha = 0.5)
    # ax.plot(tdf.Taiwan.diff(1)[window_size:], color = 'blue', alpha = 0.75)

    ax.legend( loc = 'upper left', frameon = True, shadow = True, 
        facecolor = '#f8f8f6', 
        edgecolor = 'darkblue',
        title = '{:^50}\n{:^70}\n'.format(dsource, ddate), title_fontsize = 14,
        # title = '{:^60}\n{:^60}\n{:^90}\n'.format(dsource, durl, ddate), title_fontsize = 13,
        fontsize = 13)
    ax.grid (b = 'on', which = 'major', alpha = 0.25, ls = '--')
    xticks_locataion = [tdf.index[x] for x in range(2, len(tdf.index), 28)]
    xticks_labels = [str(tdf.index[i].date()) for i in range(5, len(tdf.index), 28)]
    ax.set_xticks(xticks_locataion)
    ax.set_xticklabels(xticks_labels)    
    ax2 = ax.twinx()
    ylim = ax.get_ylim()  
    ax2.set_ylim(ylim)
    ax2.text(tdf.index[-2], tdf.Taiwan.diff(1)[-1]+5, '%s:\n Local %d , Imported %d' % 
        ((str(tdf.index[-1]).split(' ')[0]), tdf.Local.diff(1)[-1], tdf.Imported.diff(1)[-1]),
        fontsize = 16, ha = 'right')
    
tw_newcase()