In [194]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import bar_chart_race as bcr

In [195]:
df = pd.read_csv("owid-covid-data.csv")

In [196]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54554 entries, 0 to 54553
Data columns (total 52 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   iso_code                            54240 non-null  object 
 1   continent                           53910 non-null  object 
 2   location                            54554 non-null  object 
 3   date                                54554 non-null  object 
 4   total_cases                         53997 non-null  float64
 5   new_cases                           53807 non-null  float64
 6   new_cases_smoothed                  52851 non-null  float64
 7   total_deaths                        45957 non-null  float64
 8   new_deaths                          45783 non-null  float64
 9   new_deaths_smoothed                 52851 non-null  float64
 10  total_cases_per_million             53683 non-null  float64
 11  new_cases_per_million               53494

In [197]:
df[['location','total_deaths_per_million','population']][df['date'] == '2020-12-15'].sort_values(by=['population'], ascending=True).head(20).location

52818                             Vatican
41867                          San Marino
28744                       Liechtenstein
32592                              Monaco
40988               Saint Kitts and Nevis
31113                    Marshall Islands
14133                            Dominica
1164                              Andorra
1715                  Antigua and Barbuda
43278                          Seychelles
41544    Saint Vincent and the Grenadines
19688                             Grenada
41266                         Saint Lucia
41573                               Samoa
42122               Sao Tome and Principe
4375                             Barbados
52532                             Vanuatu
22032                             Iceland
3514                              Bahamas
5254                               Belize
Name: location, dtype: object

In [198]:
df['total_deaths_per_million'] = df['total_deaths_per_million'].replace(np.NaN,0)

In [199]:
df['total_deaths_per_million'] = df['total_deaths_per_million'].fillna(0).astype(np.int64)

In [200]:
df.drop(df[df['population'] < 1000000].index, inplace=True)

In [216]:
df_pivoted = df[df['continent'] == 'South America'].pivot(index='date', columns='location', values='total_deaths_per_million')
#df_pivoted = df.pivot(index='date', columns='location', values='total_deaths_per_million')

In [217]:
df_pivoted = df_pivoted.fillna(0).astype(np.int64)

In [218]:
df_pivoted = df_pivoted.replace(np.NaN,0)

In [219]:
df_pivoted.head(15)

location,Argentina,Bolivia,Brazil,Chile,Colombia,Ecuador,Paraguay,Peru,Uruguay,Venezuela
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01,0,0,0,0,0,0,0,0,0,0
2020-02-03,0,0,0,0,0,0,0,0,0,0
2020-02-04,0,0,0,0,0,0,0,0,0,0
2020-02-05,0,0,0,0,0,0,0,0,0,0
2020-02-06,0,0,0,0,0,0,0,0,0,0
2020-02-07,0,0,0,0,0,0,0,0,0,0
2020-02-08,0,0,0,0,0,0,0,0,0,0
2020-02-09,0,0,0,0,0,0,0,0,0,0
2020-02-10,0,0,0,0,0,0,0,0,0,0
2020-02-11,0,0,0,0,0,0,0,0,0,0


In [220]:
df_pivoted.index[:130]

Index(['2020-01-01', '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06',
       '2020-02-07', '2020-02-08', '2020-02-09', '2020-02-10', '2020-02-11',
       ...
       '2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04', '2020-06-05',
       '2020-06-06', '2020-06-07', '2020-06-08', '2020-06-09', '2020-06-10'],
      dtype='object', name='date', length=130)

In [221]:
df_pivoted.drop(df_pivoted.index[:130],inplace=True) #onde o Brazil someça a aparecer

In [244]:
clrs = ['green' if (x == 'Brazil') else 'grey' for x in df.location.unique()]
clrs = ['green' if (x == 'Brazil') else 'grey' for x in df[df['continent'] == 'South America'].location.unique()]

In [245]:
def summary(values, ranks):
    total_deaths = int(round(values.sum(), -2))
    s = f'Total Deaths - {total_deaths:,.0f}'
    return {'x': .99, 'y': .05, 's': s, 'ha': 'right', 'size': 8}

In [250]:
def rank(values, ranks):
    top2 = values.nlargest(2)
    leader = top2.index[0]
    lead = top2.iloc[0] - top2.iloc[1]
    s = f'{leader} by {lead:.0f}'
    return {'x': .99, 'y': .05, 's': s, 'ha': 'right', 'size': 8}

In [251]:
df[(df['location'] == 'Brazil') & (df['date'] == '2020-08-01')].head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
6805,BRA,South America,Brazil,2020-08-01,2707877.0,45392.0,44766.286,93563.0,1088.0,1016.286,...,14103.452,3.4,177.961,8.11,10.1,17.9,,2.2,75.88,0.759


In [254]:
bcr.bar_chart_race(df=df_pivoted, 
                   filename=None, 
                   n_bars=20,
                   cmap=clrs,
                   bar_size=.90,
                   steps_per_period=20, 
                   period_length=200,
                   period_fmt='Season {x:,.0}',
                   period_summary_func=rank,
                   title='COVID-19 - Óbtios por milhões de habitantes')

Exception: You do not have ffmpeg installed on your machine. Download
                            ffmpeg from here: https://www.ffmpeg.org/download.html.
                            
                            Matplotlib's original error message below:

                            Cannot specify ',' with 's'.
                            