In [1]:
# imports
import pandas as pd
import bar_chart_race as bcr

# open csv file fro John Hopkins
df = pd.read_csv('time_series_covid19_confirmed_global.csv')

# remove log and lat values
df = df.drop(columns=['Lat', 'Long'])

# combine Province/State, Country/Region to new column = Location
df['Location'] = df[['Province/State', 'Country/Region']].apply(lambda x: ', '.join(x.dropna()), axis=1)

# remove old columns that we used to make Location
df = df.drop(columns=['Province/State', 'Country/Region'])

# location is now at the front
# moves everything to a variable and makes it a list
cols = list(df.columns)
cols = [cols[-1]] + cols[:-1]
df = df[cols]

# for bcr, index has to be a date and cols have to be the places you want to plot
# so we transpose our df so dates are on the side
df_transposed = df.T
df_transposed.columns = df_transposed.iloc[0].to_list()
df_transposed = df_transposed.iloc[1:]
df_transposed


Unnamed: 0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,"Australian Capital Territory, Australia","New South Wales, Australia",...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/26/20,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1/14/21,53775,65994,103127,8868,18613,184,1770715,163576,118,5045,...,3260258,29989,77777,1,118415,1531,150505,2110,32800,25368
1/15/21,53831,66635,103381,8946,18679,184,1783047,163972,118,5057,...,3316019,30946,77904,1,118856,1536,151142,2111,34278,26109
1/16/21,53938,67216,103611,9038,18765,187,1791979,164235,118,5066,...,3357361,31669,77968,1,119306,1537,151569,2112,36074,26881
1/17/21,53984,67690,103833,9083,18875,189,1799243,164586,118,5074,...,3395959,32378,77968,1,119803,1537,152031,2112,37605,27203


In [2]:
# label the index
df_transposed.index.names = ['Date']
df_transposed

# specify countries to be included for pre-processing into a subset
cols = ['United Kingdom', 'Germany', 'Spain', 'US', 'India', 'Brazil', 'Guatemala', 'Italy', 'France', 'Hubei, China']
subset = df_transposed[cols]

# create a new dataframe and make sure all the cells are in numeric form
csdf = subset.cumsum(axis=0)
csdf = csdf.apply(pd.to_numeric)
csdf

# turn index to datetime objects
csdf.index = pd.to_datetime(csdf.index)


In [3]:
# plot the racebar
bcr.bar_chart_race(
                    df=csdf,
                    title="COVID-19 Case by Country",
                    filename='covid-19-viz.mp4',
                    period_fmt="%b %-d, %Y",
                    n_bars=8,
                    steps_per_period=100,
                    interpolate_period=True
)