# COVID-19 Bar Chart Race - Number of Cases

### Author: Akhil James
### Data Source: https://ourworldindata.org/coronavirus-data

## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bar_chart_race as bcr
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")
from IPython.display import Video

## 2. Load Data

In [2]:
covid_data_complete = pd.read_excel('covid-data.xlsx')
covid_data_complete.head()

Unnamed: 0,date,location,total_cases,new_cases,total_deaths,new_deaths
0,2020-03-13,Aruba,2,2,0,0
1,2020-03-20,Aruba,4,2,0,0
2,2020-03-24,Aruba,12,8,0,0
3,2020-03-25,Aruba,17,5,0,0
4,2020-03-26,Aruba,19,2,0,0


## 3. Inspect the dataframe

In [3]:
covid_data_complete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20551 entries, 0 to 20550
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   date          20551 non-null  object
 1   location      20551 non-null  object
 2   total_cases   20551 non-null  int64 
 3   new_cases     20551 non-null  int64 
 4   total_deaths  20551 non-null  int64 
 5   new_deaths    20551 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 963.5+ KB


## 4. Drop Columns and modify dataframe

In [4]:
# change datatype of date to a pandas datetime format
covid_data_complete["date"] = covid_data_complete["date"].apply(pd.to_datetime)

#drop columns other than total_cases
drop_cols = ['new_cases', 'total_deaths', 'new_deaths']
covid_data_complete.drop(covid_data_complete[drop_cols],axis=1,inplace=True)
#covid_data_complete.drop(['new_cases' , 'total_deaths', 'new_deaths'] , axis=1, inplace=True)

covid_data_complete.head()

Unnamed: 0,date,location,total_cases
0,2020-03-13,Aruba,2
1,2020-03-20,Aruba,4
2,2020-03-24,Aruba,12
3,2020-03-25,Aruba,17
4,2020-03-26,Aruba,19


In [5]:
covid_data = covid_data_complete.copy() #make a copy for analysis
covid_data.columns = ['Date', 'Countries', 'Cases'] #rename columns
covid_data.head(10)

Unnamed: 0,Date,Countries,Cases
0,2020-03-13,Aruba,2
1,2020-03-20,Aruba,4
2,2020-03-24,Aruba,12
3,2020-03-25,Aruba,17
4,2020-03-26,Aruba,19
5,2020-03-27,Aruba,28
6,2020-03-28,Aruba,28
7,2020-03-29,Aruba,28
8,2020-03-30,Aruba,50
9,2020-04-01,Aruba,55


In [6]:
total_countries = covid_data['Countries'].nunique()
total_countries

210

## 5. Arrange the dataframe for creating bar chart race

In [7]:
# set countries and date as index and find cases
# transpose the dataframe to have countries as columns and dates as rows
covid_data_by_date = covid_data.set_index(['Countries','Date']).unstack()['Cases'].T.reset_index()

covid_data_by_date = covid_data_by_date.set_index('Date') #make date as index - desired by barchartrace

covid_data_by_date = covid_data_by_date.fillna(0) #fill na with 0
covid_data_by_date

Countries,Afghanistan,Albania,Algeria,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,Aruba,...,United States Virgin Islands,Uruguay,Uzbekistan,Vatican,Venezuela,Vietnam,Western Sahara,Yemen,Zambia,Zimbabwe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-26,11173.0,1004.0,8503.0,763.0,69.0,3.0,25.0,12615.0,7113.0,101.0,...,69.0,787.0,3261.0,12.0,1177.0,326.0,6.0,237.0,920.0,56.0
2020-05-27,11831.0,1029.0,8697.0,763.0,71.0,3.0,25.0,13215.0,7402.0,101.0,...,69.0,789.0,3333.0,12.0,1211.0,327.0,6.0,249.0,920.0,56.0
2020-05-28,12456.0,1050.0,8857.0,763.0,71.0,3.0,25.0,13920.0,7774.0,101.0,...,69.0,803.0,3333.0,12.0,1245.0,327.0,6.0,255.0,1057.0,132.0
2020-05-29,13036.0,1076.0,8997.0,763.0,73.0,3.0,25.0,14689.0,8216.0,101.0,...,69.0,811.0,3444.0,12.0,1327.0,327.0,6.0,278.0,1057.0,149.0


## 6. Create the BarChartRace and save file as mp4 

In [8]:
#make the mp4 file with the BarChartRace and save it
df = covid_data_by_date
bcr.bar_chart_race(
    df=df,
    filename='Covid19_BarChartRace.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=False,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      #'s': f'Total cases: {v.nlargest(total_countries).sum():,.0f}',
                                      's': '',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    perpendicular_bar_func='median',
    period_length=1000,
    figsize=(5, 3),
    dpi=500,
    cmap='dark24',
    title='Developer: Akhil James\n  COVID-19 Cases by Country\n ',
    title_size=10,
    bar_label_size=7,
    tick_label_size=5,
    #shared_fontdict={'family' : 'Helvetica',  'weight' : 'bold', 'color' : '.1'},
    shared_fontdict={'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=True)  

In [9]:
Video("Covid19_BarChartRace.mp4", width=900, height=600)