# Visualization of NCOPID-19 Data
One of the reasons of Python, such populay by ML developers, is its visualization utilities:
- Matplotlib, provides the basic functions and utilities to make visualization;
- seaborn provides high-level interface for drawing  informative statistical graphics;
- plotly, it could not be absent of creating both on-line and off-line visualizations with hand-on interact.

Furthermore, python never let you down if animation, dashboard setup are required; try `moviepy, ipywidget, dash, etc`.

**Note**. Installing or updating the Python package, you could do it as follows:

```
 shell > pip install -U plotly
   or
 shell > conda install plotly
```
**Exercise**: install plotly as above.


## Data Prepatation
In last week practicing, we had learn how to work on the time series data from [JUH](https://github.com/CSSEGISandData/COVID-19). Now create today practicing, NCov-2.ipynb, as follows:

```
        COVID-19-master/
           csse_covid_19_data/
           ...        
        t/
           NCov-1.ipynb
           NCov-2.ipynb
           ...
           tmp/
``` 
In this practicing, the daily data would be used, but not time-series data.        

In [None]:
import pandas as pd
import numpy as np
import re
import os

import pickle
import os.path
from datetime import datetime, date, time 
from time import strftime

import json
from dateutil.parser import parse
import matplotlib.pyplot as plt


In [None]:
# What day is today
str(datetime.date(datetime.now()))

In [None]:
DATA = '../COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'

def clean_sheet_names(new_ranges):
    '''
    Get rid of the duplicate sheets, only take the sheets from the 
    latest point in the day
    '''
    indices = []
    
    # Remove all sheets that dont have a numeric header
    numeric_sheets = [x for x in new_ranges if re.search(r'\d', x)]   
    
    return numeric_sheets

In [None]:
df=pd.read_csv('../COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/03-05-2020.csv')
df.head(20)

In [None]:
sheets = os.listdir(DATA)

# Clean the result to the sheet tabs we want
cleaned_sheets = clean_sheet_names(sorted(sheets, reverse=True))

In [None]:
'''
For assigning date by the time sheet name
'''

def clean_last_updates(last_update):
    date = parse(str(last_update).split(' ')[0]).strftime("%Y-%m-%d")
    time = parse(str(last_update).split(' ')[1]).strftime('%H:%M:%S')
    parsed_date = str(date) + ' ' + str(time)

    return parsed_date

def get_date(last_update):
    return parse(str(last_update).split(' ')[0]).strftime("%Y-%m-%d")

def drop_duplicates(df_raw):
    '''
    Take the max date value for each province for a given date
    '''
    days_list = []
    
    for datetime in df_raw.date.unique():
        tmp_df = df_raw[df_raw.date == datetime]
        tmp_df = tmp_df.sort_values(['Last Update']).drop_duplicates('Province/State', keep='last')
        days_list.append(tmp_df)

    return days_list

In [None]:
keep_cols = ['Confirmed', 'Country/Region', 'Deaths', 'Last Update', 'Province/State', 'Recovered']
numeric_cols = ['Confirmed', 'Deaths', 'Recovered']

def get_data(cleaned_sheets):
    all_csv = []
    # Import all CSV's
    for file in sorted(sheets):
        if 'csv' in file:
            print('...', file)
            tmp_df = pd.read_csv(os.path.join(DATA, file), index_col=None, header=0, parse_dates=['Last Update'])
            tmp_df = tmp_df[keep_cols]
            tmp_df[numeric_cols] = tmp_df[numeric_cols].fillna(0)
            tmp_df[numeric_cols] = tmp_df[numeric_cols].astype(int)
            tmp_df['Province/State'].fillna(tmp_df['Country/Region'], inplace=True)

            tmp_df['Last Update'] = tmp_df['Last Update'].apply(clean_last_updates)
            tmp_df['date'] = tmp_df['Last Update'].apply(get_date)

            all_csv.append(tmp_df)

    df_raw = pd.concat(all_csv, axis=0, ignore_index=True, sort=True)
    df_raw = df_raw.sort_values(by=['Last Update'])

    #Get the last entry per region by date
    frames = drop_duplicates(df_raw)
    tmp = pd.concat(frames, axis=0, ignore_index=True, sort=True)
    
    return tmp

df = get_data(cleaned_sheets)

In [None]:
df.tail()

In [None]:
df_countries = df.groupby(['Country/Region', 'date']).max().reset_index().sort_values('date', ascending=False)
df_countries = df_countries.drop_duplicates(subset = ['Country/Region'])
df_countries = df_countries[df_countries['Confirmed']>0]

df_countries.head()

In [None]:
from plotly.offline import init_notebook_mode, plot, iplot, download_plotlyjs
import plotly.graph_objs as go
import plotly.express as px

In [None]:
import plotly as py

from plotly.subplots import make_subplots

init_notebook_mode(connected=True) 

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations = df_countries['Country/Region'],
    locationmode = 'country names',
    z = df_countries['Confirmed'],
    colorscale = 'Reds',
    marker_line_color = 'black',
    marker_line_width = 0.5,
))

fig.update_layout(
    title_text = 'Confirmed Cases as of March 2, 2020',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        projection_type = 'equirectangular'
    )
)  
#iplot(fig, filename='NCOVID-19.html')  

In [None]:
df_countries_no_china = df_countries[df_countries['Country/Region'] != 'Mainland China']
fig = go.Figure(data=go.Choropleth(
    locations = df_countries_no_china['Country/Region'],
    locationmode = 'country names',
    z = df_countries_no_china['Confirmed'],
    colorscale = 'Reds',
    marker_line_color = 'black',
    marker_line_width = 0.5
))

fig.update_layout(
    title_text = 'Confirmed Cases as of February 28, 2020 excl. China',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        projection_type = 'equirectangular'
    )
)

In [None]:
df_countrydate = df[df['Confirmed']>0]
df_countrydate = df_countrydate.groupby(['date','Country/Region']).sum().reset_index()
df_countrydate

In [None]:
fig = px.choropleth(df_countrydate, 
                    locations="Country/Region", 
                    locationmode = "country names",
                    color="Confirmed", 
                    hover_name="Country/Region", 
                    animation_frame="date"
                   )

fig.update_layout(
    title_text = 'Spread of Coronavirus',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))
    

In [None]:
fig = px.pie(df_countries, values = 'Confirmed',names='Country/Region', height=600)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))


In [None]:
fig = px.pie(df_countries_no_china, values = 'Confirmed',names='Country/Region', height=600)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ))

In [None]:
bar_data = df.groupby(['Country/Region', 'date'])['Confirmed', 'Deaths', 'Recovered'].sum().reset_index().sort_values('date', ascending=True)

fig = px.bar(bar_data, x="date", y="Confirmed", color='Country/Region', text = 'Confirmed', orientation='v', height=600,
             title='Cases')
fig.show()

fig = px.bar(bar_data, x="date", y="Deaths", color='Country/Region', text = 'Deaths', orientation='v', height=600,
             title='Deaths')
fig.show()

fig = px.bar(bar_data, x="date", y="Recovered", color='Country/Region', text = 'Recovered', orientation='v', height=600,
             title='Recovered')
fig.show()

In [None]:
bar_data_no_china = bar_data[bar_data['Country/Region']!='Mainland China']

fig = px.bar(bar_data_no_china, x="date", y="Confirmed", color='Country/Region', text = 'Confirmed', orientation='v', height=600,
             title='Cases')
fig.show()

fig = px.bar(bar_data_no_china, x="date", y="Deaths", color='Country/Region', text = 'Confirmed', orientation='v', height=600,
             title='Deaths')
fig.show()

fig = px.bar(bar_data_no_china, x="date", y="Recovered", color='Country/Region', text = 'Confirmed', orientation='v', height=600,
             title='Recovered')
fig.show()