## Covid-19 - Bar Chart Race

In [2]:
#importing required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import seaborn as sns
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")
from IPython.display import Video

In [4]:
# loading the dataset
df = pd.read_csv("covid_19_india.csv")
df.sample(5)

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
2712,2713,2020-06-06,8:00 AM,Gujarat,-,-,13003,1190,19094
581,582,2020-04-02,6:00 PM,Rajasthan,-,-,3,0,108
11572,11573,2021-02-11,8:00 AM,Jharkhand,-,-,117616,1081,119161
3911,3912,2020-07-09,8:00 AM,Manipur,-,-,793,0,1435
1490,1491,2020-05-01,5:00 PM,Jammu and Kashmir,-,-,216,8,614


In [5]:
#checking basic information about data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14654 entries, 0 to 14653
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Sno                       14654 non-null  int64 
 1   Date                      14654 non-null  object
 2   Time                      14654 non-null  object
 3   State/UnionTerritory      14654 non-null  object
 4   ConfirmedIndianNational   14654 non-null  object
 5   ConfirmedForeignNational  14654 non-null  object
 6   Cured                     14654 non-null  int64 
 7   Deaths                    14654 non-null  int64 
 8   Confirmed                 14654 non-null  int64 
dtypes: int64(4), object(5)
memory usage: 1.0+ MB


***We need to pre-process the data before creating a bar_chart_race***

In [6]:
# creating a copy of dataframe to avoid any changes in the orignal dataset.
covid_df = df.copy()

In [7]:
# Droping unwanted columns 
covid_df.drop(["Sno","Time","ConfirmedIndianNational","ConfirmedForeignNational"],axis=1,inplace=True)
# change datatype of date to a pandas datetime format
covid_df['Date'] = pd.to_datetime(covid_df['Date'], dayfirst=True)
print(covid_df['Date'].dtypes)   

datetime64[ns]


***Now lets take a look at distinct states in the dataset***

In [8]:
covid_df['State/UnionTerritory'].unique()

array(['Kerala', 'Telengana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Mizoram', 'Andaman and Nicobar Islands',
       'Goa', 'Unassigned', 'Assam', 'Jharkhand', 'Arunachal Pradesh',
       'Tripura', 'Nagaland', 'Meghalaya',
       'Dadra and Nagar Haveli and Daman and Diu',
       'Cases being reassigned to states', 'Sikkim', 'Daman & Diu',
       'Lakshadweep', 'Telangana'], dtype=object)

***There are some rows where the 'States' is not defined properly. So, we are not considering those states for our analysis and thus removing them. Also there is some reduntant state so will drop them.***

In [12]:
covid_df[(covid_df['State/UnionTerritory'] != 'Unassigned') &
                    (covid_df['State/UnionTerritory'] != 'Cases being reassigned to states')]
covid_df.drop(covid_df[covid_df['State/UnionTerritory'] == 'Telengana'].index,inplace = True)
covid_df['State/UnionTerritory'].unique()

array(['Kerala', 'Delhi', 'Rajasthan', 'Uttar Pradesh', 'Haryana',
       'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra', 'Punjab',
       'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand', 'Odisha',
       'Puducherry', 'West Bengal', 'Chhattisgarh', 'Chandigarh',
       'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh', 'Bihar',
       'Manipur', 'Mizoram', 'Andaman and Nicobar Islands', 'Goa',
       'Unassigned', 'Assam', 'Jharkhand', 'Arunachal Pradesh', 'Tripura',
       'Nagaland', 'Meghalaya',
       'Dadra and Nagar Haveli and Daman and Diu',
       'Cases being reassigned to states', 'Sikkim', 'Daman & Diu',
       'Lakshadweep', 'Telangana'], dtype=object)

In [13]:
covid_df.head()

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed
0,2020-01-30,Kerala,0,0,1
1,2020-01-31,Kerala,0,0,1
2,2020-02-01,Kerala,0,0,2
3,2020-02-02,Kerala,0,0,3
4,2020-02-03,Kerala,0,0,3


### Covid-19 Rise of Confirmed Cases

In [16]:
# arranging the dataframes for creating a bar chart race 
# set countries and date as index and find cases
# transpose the dataframe to have countries as columns and dates as rows

covid_df_by_date = covid_df.set_index(['State/UnionTerritory','Date']).unstack()['Confirmed'].T.reset_index()

covid_df_by_date = covid_df_by_date.set_index('Date') #make date as index - desired by barchartrace

covid_df_by_date = covid_df_by_date.fillna(0) #fill na with 0

covid_df_by_date.sample(5) 

State/UnionTerritory,Andaman and Nicobar Islands,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Cases being reassigned to states,Chandigarh,Chhattisgarh,Dadra and Nagar Haveli and Daman and Diu,Daman & Diu,...,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Unassigned,Uttar Pradesh,Uttarakhand,West Bengal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-06-30,90.0,13891.0,187.0,7752.0,9640.0,7004.0,435.0,2761.0,203.0,0.0,...,5418.0,17660.0,88.0,86224.0,0.0,1380.0,0.0,22828.0,2831.0,17907.0
2020-11-15,4527.0,852955.0,15792.0,210175.0,225595.0,0.0,15636.0,210004.0,3285.0,0.0,...,140605.0,223633.0,4456.0,756372.0,0.0,31945.0,0.0,509903.0,68002.0,428498.0
2021-03-23,5039.0,894044.0,16842.0,218006.0,263659.0,0.0,24667.0,325678.0,3485.0,0.0,...,215409.0,326026.0,6206.0,868367.0,0.0,33470.0,0.0,608076.0,98552.0,580999.0
2020-03-21,0.0,3.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,13.0,17.0,0.0,3.0,0.0,0.0,0.0,24.0,3.0,3.0
2021-04-19,5390.0,962037.0,16987.0,224455.0,324117.0,0.0,33934.0,544840.0,4803.0,0.0,...,300038.0,414617.0,6696.0,991451.0,0.0,34034.0,0.0,851620.0,124033.0,659927.0


In [19]:
#creating bar chart race
df = covid_df_by_date
bcr.bar_chart_race(
    df=df,
    filename='covid-19-confirmed-cases-india.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total Confirmed Cases: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    period_length=500,
    figsize=(5, 3),
    dpi=144,
    cmap='set2',
    title='COVID-19 Rise of Confirmed Cases in India',
    title_size='10',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False)  

### Covid-19 Number of Deaths in India

In [20]:
# arranging the dataframes for creating a bar chart race 
# set countries and date as index and find cases
# transpose the dataframe to have countries as columns and dates as rows

covid_death_by_date = covid_df.set_index(['State/UnionTerritory','Date']).unstack()['Deaths'].T.reset_index()

covid_death_by_date = covid_death_by_date.set_index('Date') #make date as index - desired by barchartrace

covid_death_by_date = covid_death_by_date.fillna(0) #fill na with 0

covid_death_by_date.sample(5) 

State/UnionTerritory,Andaman and Nicobar Islands,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Cases being reassigned to states,Chandigarh,Chhattisgarh,Dadra and Nagar Haveli and Daman and Diu,Daman & Diu,...,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Unassigned,Uttar Pradesh,Uttarakhand,West Bengal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-11-04,60.0,6734.0,42.0,934.0,1108.0,0.0,228.0,2266.0,2.0,0.0,...,4245.0,1936.0,73.0,11214.0,0.0,353.0,0.0,7089.0,1033.0,7013.0
2021-01-09,62.0,7127.0,56.0,1059.0,1428.0,0.0,326.0,3469.0,2.0,0.0,...,5437.0,2727.0,129.0,12208.0,0.0,388.0,0.0,8469.0,1562.0,9902.0
2020-11-11,60.0,6814.0,46.0,952.0,1156.0,0.0,241.0,2482.0,2.0,0.0,...,4358.0,2008.0,80.0,11387.0,0.0,359.0,0.0,7261.0,1080.0,7403.0
2020-11-28,61.0,6976.0,50.0,980.0,1248.0,0.0,270.0,2813.0,2.0,0.0,...,4737.0,2255.0,103.0,11681.0,0.0,370.0,0.0,7697.0,1201.0,8270.0
2020-06-07,0.0,73.0,0.0,4.0,30.0,0.0,5.0,4.0,0.0,0.0,...,50.0,231.0,0.0,251.0,0.0,0.0,0.0,257.0,11.0,383.0


In [21]:
# creating bar chart race
df = covid_death_by_date
bcr.bar_chart_race(
    df=df,
    filename='covid-19-deaths-in-india.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total Deaths: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    period_length=500,
    figsize=(5, 3),
    dpi=144,
    cmap='Dark2',
    title='COVID-19-Deaths-in-India',
    title_size='10',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False) 

### Covid-19 Number of Recovered Cases

In [23]:
# arranging the dataframes for creating a bar chart race 
# set countries and date as index and find cases
# transpose the dataframe to have countries as columns and dates as rows

covid_recovery_by_date = covid_df.set_index(['State/UnionTerritory','Date']).unstack()['Cured'].T.reset_index()

covid_recovery_by_date = covid_recovery_by_date.set_index('Date') #make date as index - desired by barchartrace

covid_recovery_by_date = covid_recovery_by_date.fillna(0) #fill na with 0

covid_recovery_by_date.sample(5) 

State/UnionTerritory,Andaman and Nicobar Islands,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Cases being reassigned to states,Chandigarh,Chhattisgarh,Dadra and Nagar Haveli and Daman and Diu,Daman & Diu,...,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Unassigned,Uttar Pradesh,Uttarakhand,West Bengal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-09,4866.0,874531.0,16631.0,212430.0,249198.0,0.0,19637.0,275042.0,3373.0,0.0,...,160042.0,301962.0,5457.0,805136.0,0.0,32854.0,0.0,571606.0,88370.0,540952.0
2020-11-03,4132.0,798625.0,13238.0,197569.0,209582.0,0.0,13708.0,166391.0,3221.0,0.0,...,125961.0,182680.0,3655.0,698820.0,0.0,29318.0,0.0,455498.0,58050.0,338075.0
2020-04-27,11.0,235.0,1.0,27.0,56.0,0.0,17.0,32.0,0.0,0.0,...,71.0,518.0,0.0,1020.0,0.0,2.0,0.0,335.0,33.0,105.0
2020-07-15,109.0,17467.0,153.0,11416.0,12849.0,0.0,446.0,3275.0,310.0,0.0,...,5663.0,19161.0,87.0,97310.0,0.0,1538.0,0.0,24983.0,2867.0,19931.0
2021-03-21,4969.0,884094.0,16783.0,215201.0,261413.0,0.0,22142.0,311520.0,3425.0,0.0,...,187198.0,318842.0,6016.0,845812.0,0.0,33043.0,0.0,595518.0,95851.0,566526.0


In [24]:
# creating bar chart race
df = covid_recovery_by_date
bcr.bar_chart_race(
    df=df,
    filename='covid-19-recovered-cases-india.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total Recovered Cases: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    period_length=500,
    figsize=(5, 3),
    dpi=144,
    cmap='Dark2',
    title='COVID-19 Recovered Cases in India',
    title_size='10',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False) 

### Vaccination Total Doses Administered in India

In [25]:
df1 = pd.read_csv("covid_vaccine_statewise.csv")
df1.sample(5)

Unnamed: 0,Updated On,State,Total Individuals Vaccinated,Total Sessions Conducted,Total Sites,First Dose Administered,Second Dose Administered,Male(Individuals Vaccinated),Female(Individuals Vaccinated),Transgender(Individuals Vaccinated),Total Covaxin Administered,Total CoviShield Administered,AEFI,18-30 years (Age),30-45 years (Age),45-60 years (Age),60+ years (Age),Total Doses Administered
1697,13/04/2021,Jammu and Kashmir,1217666.0,144250.0,1256.0,1217666.0,202914.0,755761.0,461685.0,220.0,0.0,1420580.0,90.0,11153.0,155845.0,581627.0,468906.0,1420580.0
3848,10/03/2021,Tripura,120654.0,7144.0,410.0,120654.0,38049.0,73190.0,47459.0,5.0,73.0,120581.0,,,,,,158703.0
3500,07/03/2021,Sikkim,25221.0,963.0,77.0,25221.0,4256.0,14924.0,10295.0,2.0,201.0,25020.0,,,,,,29477.0
3023,18/02/2021,Odisha,373908.0,6103.0,1315.0,373908.0,34080.0,172003.0,201859.0,46.0,30707.0,343201.0,,,,,,407988.0
24,09/02/2021,India,6002474.0,175556.0,38131.0,6002474.0,0.0,2458329.0,3543208.0,937.0,363692.0,5638782.0,,,,,,6002474.0


In [36]:
# creating a copy of dataframe to avoid any changes in the orignal dataset.
vaccine_df = df1.copy()

In [37]:
vaccine_df.drop(['AEFI','18-30 years (Age)','30-45 years (Age)','45-60 years (Age)','60+ years (Age)'],axis=1,inplace=True)
# renaming column
vaccine_df.rename(columns={'Updated On':'Date'}, inplace=True)
# coverting into datetimeformat
vaccine_df['Date'] = pd.to_datetime(vaccine_df['Date'])

In [39]:
# dropping 'India' value from state column 
vaccine_df.drop(vaccine_df[vaccine_df['State'] == 'India'].index,inplace = True)
# filling NAN with zeros
vaccine_df= vaccine_df.fillna(0) 

In [41]:
# arranging the dataframes for creating a bar chart race 
# set countries and date as index and find cases
# transpose the dataframe to have countries as columns and dates as rows

vaccine_df_by_date = vaccine_df.set_index(['State','Date']).unstack()['Total Doses Administered'].T.reset_index()

vaccine_df_by_date = vaccine_df_by_date.set_index('Date') #make date as index - desired by barchartrace

vaccine_df_by_date = vaccine_df_by_date.fillna(0) #fill na with 0

vaccine_df_by_date.sample(5) 

State,Andaman and Nicobar Islands,Andhra Pradesh,Arunachal Pradesh,Assam,Bihar,Chandigarh,Chhattisgarh,Dadra and Nagar Haveli and Daman and Diu,Delhi,Goa,...,Puducherry,Punjab,Rajasthan,Sikkim,Tamil Nadu,Telangana,Tripura,Uttar Pradesh,Uttarakhand,West Bengal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-21,239.0,45040.0,1154.0,2163.0,25254.0,281.0,5752.0,78.0,5308.0,72.0,...,249.0,4780.0,13605.0,186.0,15654.0,17837.0,1812.0,1578.0,1874.0,20206.0
2021-01-22,552.0,55697.0,1818.0,3947.0,25773.0,507.0,5807.0,113.0,5331.0,300.0,...,440.0,9676.0,24545.0,296.0,23659.0,24830.0,3321.0,21325.0,3163.0,41970.0
2021-04-30,104374.0,6579034.0,241423.0,2480734.0,6967492.0,208489.0,5570909.0,95496.0,3271950.0,349718.0,...,197864.0,3394080.0,13021948.0,203966.0,5947056.0,4780184.0,1236761.0,12557806.0,2079214.0,10971948.0
2021-02-16,3467.0,370880.0,17130.0,132946.0,485733.0,8018.0,294241.0,4754.0,214273.0,14463.0,...,6502.0,102258.0,626481.0,9006.0,215964.0,290997.0,68681.0,975907.0,126204.0,528075.0
2021-03-13,14133.0,1085975.0,58665.0,553725.0,1365157.0,44001.0,892387.0,17574.0,619039.0,67503.0,...,27025.0,359181.0,2796674.0,46701.0,1204223.0,671203.0,180199.0,2651620.0,342819.0,2350319.0


In [43]:
# creating bar Chart Race 
df = vaccine_df_by_date
bcr.bar_chart_race(
    df=df,
    filename='covid-19_vaccine_in_india.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=7,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total Doses Administered: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    period_length=500,
    figsize=(5, 3),
    dpi=144,
    cmap='Dark2',
    title='COVID-19 Vaccine Total Doses Administered',
    title_size='10',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False)  