# 6. 數據分析專案
# Demo5 – Covid-19 Statistics
# Visualization

### Data Import

In [2]:
import pandas as pd
import numpy as np

import plotly.express as px

In [3]:
full_table = pd.read_csv('covid_19_clean_complete.csv')
full_table

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.939110,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.153300,20.168300,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.033900,1.659600,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.506300,1.521800,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.202700,17.873900,2020-01-22,0,0,0,0,Africa
...,...,...,...,...,...,...,...,...,...,...
49063,,Sao Tome and Principe,0.186400,6.613100,2020-07-27,865,14,734,117,Africa
49064,,Yemen,15.552727,48.516388,2020-07-27,1691,483,833,375,Eastern Mediterranean
49065,,Comoros,-11.645500,43.333300,2020-07-27,354,7,328,19,Africa
49066,,Tajikistan,38.861000,71.276100,2020-07-27,7235,60,6028,1147,Europe


Pd.to_datetime is usually a must for time series data, otherwise Pandas
will treat it as string.

In [4]:
full_grouped = pd.read_csv('full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped

Unnamed: 0,Date,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,WHO Region
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0,Eastern Mediterranean
1,2020-01-22,Albania,0,0,0,0,0,0,0,Europe
2,2020-01-22,Algeria,0,0,0,0,0,0,0,Africa
3,2020-01-22,Andorra,0,0,0,0,0,0,0,Europe
4,2020-01-22,Angola,0,0,0,0,0,0,0,Africa
...,...,...,...,...,...,...,...,...,...,...
35151,2020-07-27,West Bank and Gaza,10621,78,3752,6791,152,2,0,Eastern Mediterranean
35152,2020-07-27,Western Sahara,10,1,8,1,0,0,0,Africa
35153,2020-07-27,Yemen,1691,483,833,375,10,4,36,Eastern Mediterranean
35154,2020-07-27,Zambia,4552,140,2815,1597,71,1,465,Africa


In [5]:
day_wise = pd.read_csv('day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
day_wise

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of countries
0,2020-01-22,555,17,28,510,0,0,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,1,2,2.75,4.59,60.00,8
2,2020-01-24,941,26,36,879,287,8,6,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,16,3,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,14,13,2.64,2.46,107.69,13
...,...,...,...,...,...,...,...,...,...,...,...,...
183,2020-07-23,15510481,633506,8710969,6166006,282756,9966,169714,4.08,56.16,7.27,187
184,2020-07-24,15791645,639650,8939705,6212290,281164,6144,228736,4.05,56.61,7.16,187
185,2020-07-25,16047190,644517,9158743,6243930,255545,4867,219038,4.02,57.07,7.04,187
186,2020-07-26,16251796,648621,9293464,6309711,204606,4104,134721,3.99,57.18,6.98,187


In [6]:
country_wise = pd.read_csv('country_wise_latest.csv')
country_wise = country_wise.replace('',np.nan).fillna(0)
country_wise

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.50,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.00,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.60,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,West Bank and Gaza,10621,78,3752,6791,152,2,0,0.73,35.33,2.08,8916,1705,19.12,Eastern Mediterranean
183,Western Sahara,10,1,8,1,0,0,0,10.00,80.00,12.50,10,0,0.00,Africa
184,Yemen,1691,483,833,375,10,4,36,28.56,49.26,57.98,1619,72,4.45,Eastern Mediterranean
185,Zambia,4552,140,2815,1597,71,1,465,3.08,61.84,4.97,3326,1226,36.86,Africa


In [7]:
worldometer_data = pd.read_csv('worldometer_data.csv')
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
worldometer_data

Unnamed: 0,Country/Region,Continent,Population,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,WHO Region
0,USA,North America,3.311981e+08,5032179,0.0,162804.0,0.0,2576668.0,0.0,2292707.0,18296.0,15194.0,492.0,63139605.0,190640.0,Americas
1,Brazil,South America,2.127107e+08,2917562,0.0,98644.0,0.0,2047660.0,0.0,771258.0,8318.0,13716.0,464.0,13206188.0,62085.0,Americas
2,India,Asia,1.381345e+09,2025409,0.0,41638.0,0.0,1377384.0,0.0,606387.0,8944.0,1466.0,30.0,22149351.0,16035.0,South-EastAsia
3,Russia,Europe,1.459409e+08,871894,0.0,14606.0,0.0,676357.0,0.0,180931.0,2300.0,5974.0,100.0,29716907.0,203623.0,Europe
4,South Africa,Africa,5.938157e+07,538184,0.0,9604.0,0.0,387316.0,0.0,141264.0,539.0,9063.0,162.0,3149807.0,53044.0,Africa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,Montserrat,North America,4.992000e+03,13,0.0,1.0,0.0,10.0,0.0,2.0,0.0,2604.0,200.0,61.0,12220.0,0
205,Caribbean Netherlands,North America,2.624700e+04,13,0.0,0.0,0.0,7.0,0.0,6.0,0.0,495.0,0.0,424.0,16154.0,0
206,Falkland Islands,South America,3.489000e+03,13,0.0,0.0,0.0,13.0,0.0,0.0,0.0,3726.0,0.0,1816.0,520493.0,0
207,Vatican City,Europe,8.010000e+02,12,0.0,0.0,0.0,12.0,0.0,0.0,0.0,14981.0,0.0,0.0,0.0,Europe


### Color Palettes

There are many color palettes (with
rgb#) online resources for you to
pick series of theme color.
https://colorhunt.co/

In our Covid-19 visualization, it is relatively simple. We fix these four
colour on our decision, in case of picking by plotly default.

In [8]:
cnf, dth, rec, act = '#393e46', '#ff2e63', '#fe9801', '#21bf73'

In [9]:
temp = day_wise[['Date','Deaths', 'Recovered', 'Active']].tail(1)
temp = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(temp, path=["variable"], values="value", height=225,
                 color_discrete_sequence=[act, rec, dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Create a plot function

DRY, in programming norm, Don’t Repeat Yourself. If you need to plot
same kind of plots more than once, you should create a function.

In [10]:
def plot_map(df, col, pal): # col is column in df; pal is color palettes series
    df = df[df[col]>0]
    fig = px.choropleth(df, locations="Country/Region", locationmode='country names',
                        color=col, hover_name="Country/Region",
                        title=col, hover_data=[col], color_continuous_scale=pal)
#       fig.update_layout(coloraxis_showscale=False)
    fig.show()

#### Plotting confirmed case

In [11]:
plot_map(country_wise, 'Confirmed', 'matter')

#### Plotting death cases

In [12]:
plot_map(country_wise, 'Deaths', 'matter')

### Choropleth with time slider

Covid-19 outbreak spread rapidly, we may plot an interactive graph to review
how it did.

#### Over the time

In [13]:
fig = px.choropleth(full_grouped, locations="Country/Region",
                    color=np.log(full_grouped["Confirmed"]),
                    locationmode='country names', hover_name='Country/Region',
                    animation_frame=full_grouped["Date"].dt.strftime('Y-%m-%d'),
                    title='Cases over time', color_continuous_scale=px.colors.sequential.matter)
fig.update(layout_coloraxis_showscale=False)
fig.show()


divide by zero encountered in log



AttributeError: 'Figure' object has no attribute 'set_size_inches'

### Case over time

pd.melt(frame, id_vars=None, value_vars=None, var_name=N
one, value_name='value', col_level=None, ignore_index=True)
Unpivot a DataFrame from wide to long format,
optionally leaving identifiers set.

In [None]:
temp = full_grouped.groupby('Date')[['Recovered','Deaths','Active']].sum().reset_index()
temp = temp.melt(id_vars="Date",value_vars=['Recovered','Deaths','Active'],
                 var_name='Case',value_name='Count')
temp.head()

Unnamed: 0,Date,Case,Count
0,2020-01-22,Recovered,28
1,2020-01-23,Recovered,30
2,2020-01-24,Recovered,36
3,2020-01-25,Recovered,39
4,2020-01-26,Recovered,52


In [None]:
fig = px.area(temp, x="Date", y="Count", color='Case', height=600, width=700,
              title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

### Create function to plot bar and line chart

In [None]:
def plot_daywise(col, hue):
    fig = px.bar(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="",yaxis_title="")
    fig.show()

In [None]:
def plot_daywise_line(col, hue):
    fig = px.line(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="",yaxis_title="")
    fig.show()

In [None]:
plot_daywise('New cases','#333333')

In [None]:
plot_daywise('Recovered', act) #rec?

In [None]:
plot_daywise_line('Deaths / 100 Cases', dth)

In [None]:
plot_daywise_line('Recovered / 100 Cases', act)

### Plotting recover and active case

In [None]:
temp = day_wise[['Date', 'Recovered', 'Active']]
temp = temp.melt(id_vars='Date', value_vars=['Recovered', 'Active'],
                 var_name='Variable', value_name='Count')
px.line(temp, x='Date', y='Count', color='Variable')

### Horizontal Bar

In [None]:
def plot_hbar(df, col, n, hover_data=[]):
    fig = px.bar(df.sort_values(col).tail(n),
                 x=col, y="Country/Region", color='WHO Region',
                 text=col, orientation='h', width=700, hover_data=hover_data,
                 color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title=col, xaxis_title="",yaxis_title="",
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()

In [None]:
plot_hbar(country_wise, 'Confirmed', 15)

In [None]:
plot_hbar(country_wise, 'Active', 15)

In [None]:
def plot_hbar_wm(col, n, min_pop=1000000, sort='descending'):
    df = worldometer_data[worldometer_data['Population']>min_pop]
    df = df.sort_values(col, ascending=True).tail(n)
    fig = px.bar(df,
                 x=col, y="Country/Region", color='WHO Region',
                 text=col, orientation='h', width=700,
                 color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title=col+' (Only countries with > 1M Pop)',
                      xaxis_title="",yaxis_title="",
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()

In [None]:
plot_hbar_wm('Tot Cases/1M pop', 15, 1000000)

### Scatter plot on Confirmed vs Deaths

In [None]:
fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:20, :],
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed',
                 height=700, text='Country/Region', log_x=True, log_y=True,
                 title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

### Composition with px.treemap

In [None]:
def plot_treemap(col):
    fig = px.treemap(country_wise, path=["Country/Region"],values=col, height=700,
                     title=col, color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()

In [None]:
plot_treemap('Confirmed')

### Bubble plot a long list

In [None]:
def plot_bubble(col, pal):
    temp = full_grouped[full_grouped[col]>0].sort_values('Country/Region', ascending=False)
    fig = px.scatter(temp, x='Date', y='Country/Region', size=col, color=col, height=3000,
                     color_continuous_scale=pal)
    fig.update_layout(yaxis = dict(dtick = 1))
    fig.update(layout_coloraxis_showscale=False)
    fig.show()

In [None]:
plot_bubble('New cases', 'Viridis')

## Same dot graph different compare object

### Confirmed vs Deaths

In [None]:
temp = worldometer_data[worldometer_data['WHO Region']!=0]

In [None]:
fig = px.scatter(temp, x='TotalCases', y='TotalDeaths', color='WHO Region',
                 height=700, hover_name='Country/Region', log_x=True, log_y=True,
                 title='Confirmed vs Deaths',
                 color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_traces(textposition='top center')
# fig.update_layout(showlegend=False)
# fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

### Populations vs Confirmed

In [None]:
fig = px.scatter(temp, x='Population', y='TotalCases', color='WHO Region',
                 height=700, hover_name='Country/Region', log_x=True, log_y=True,
                 title='Population vs Confirmed',
                 color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_traces(textposition='top center')
# fig.update_layout(showlegend=False)
# fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

## New

### Deaths vs Health Life Expectancy

Pd.merge() to combine 2 DFs on key.

In [None]:
happiness_report = pd.read_csv('Happiness2019.csv')
happiness_report = happiness_report[['Country or region', 'Healthy life expectancy']]

temp = country_wise.merge(happiness_report, left_on='Country/Region', right_on='Country or region')
px.scatter(temp, y='Deaths / 100 Cases', x='Healthy life expectancy', color='WHO Region',
                    hover_data=['Country/Region'])

### Chapter Wrap Up
PX could be that easy and helpful. In this chapter, we demonstrate
practical usage of data visualization of Covid-19 data.
Plotting function may be needed for plotting a few same kind of
graph.
Once you clear the coding, everything is at your finger tip.

### Reference & Resources

Official Website:
https://plotly.com/python/
Plotly Graph Objects:
https://plotly.com/python/graph-objects/
WorldBank API:
• https://blogs.worldbank.org/opendata/introducing-wbgapi-new-python-package-accessingworld-
bank-data
• https://nbviewer.org/github/tgherzog/wbgapi/blob/master/examples/wbgapi-cookbook.ipynb
• https://pypi.org/project/wbgapi/
GitHub Open Source Code:
https://github.com/plotly/plotly.py

## old data

In [None]:
# url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv'
# full_table = pd.read_csv(url)
# # path="C:/Users/User/Desktop/project 1/VS/data_folder/5Demo/"
# # file_name="country wise latest.csv"
# # full_table = pd.read_csv(f"{path}{file_name}")
# full_table


# df = pd.DataFrame(full_table)
# len(df)
# def make(a):
#     global df
#     i=0
#     if len(df)>i:
#         a=1
        
        
# a=pd.DataFrame({'Province/State':['NaN']})
# pd.concat([df,a])
# df.insert(0, "Province/State",0)

# # a=make(df)
# print(df)
