# A Deep Dive into the Covid-19 Data of India

In [267]:
import pandas as pd

In [268]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [269]:
df = pd.read_csv('../datasets/covid_19_india.csv', index_col='Sno')

In [270]:
df.shape

(18110, 8)

In [271]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18110 entries, 1 to 18110
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Date                      18110 non-null  object
 1   Time                      18110 non-null  object
 2   State/UnionTerritory      18110 non-null  object
 3   ConfirmedIndianNational   18110 non-null  object
 4   ConfirmedForeignNational  18110 non-null  object
 5   Cured                     18110 non-null  int64 
 6   Deaths                    18110 non-null  int64 
 7   Confirmed                 18110 non-null  int64 
dtypes: int64(3), object(5)
memory usage: 1.2+ MB


In [272]:
df.columns

Index(['Date', 'Time', 'State/UnionTerritory', 'ConfirmedIndianNational',
       'ConfirmedForeignNational', 'Cured', 'Deaths', 'Confirmed'],
      dtype='object')

In [273]:
df.head()

Unnamed: 0_level_0,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1
2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1
3,2020-02-01,6:00 PM,Kerala,2,0,0,0,2
4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3
5,2020-02-03,6:00 PM,Kerala,3,0,0,0,3


In [274]:
df.tail()

Unnamed: 0_level_0,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
18106,2021-08-11,8:00 AM,Telangana,-,-,638410,3831,650353
18107,2021-08-11,8:00 AM,Tripura,-,-,77811,773,80660
18108,2021-08-11,8:00 AM,Uttarakhand,-,-,334650,7368,342462
18109,2021-08-11,8:00 AM,Uttar Pradesh,-,-,1685492,22775,1708812
18110,2021-08-11,8:00 AM,West Bengal,-,-,1506532,18252,1534999


### Working with date

In [275]:
df.Date.head()

Sno
1    2020-01-30
2    2020-01-31
3    2020-02-01
4    2020-02-02
5    2020-02-03
Name: Date, dtype: object

In [276]:
## Convert to DateTime type
df['Date'] = pd.to_datetime(df.Date)

In [277]:
df.Date.head()

Sno
1   2020-01-30
2   2020-01-31
3   2020-02-01
4   2020-02-02
5   2020-02-03
Name: Date, dtype: datetime64[ns]

In [278]:
df['year'] = pd.DatetimeIndex(df.Date).year
df['month'] = pd.DatetimeIndex(df.Date).month
df['weekday'] = pd.DatetimeIndex(df.Date).weekday

## Task 1: Grouping Total Confirmed, Cured, and Death Covid cases by Year and Month.

In [279]:
df.groupby(['year', 'month'])[['year', 'month', 'Cured', 'Deaths', 'Confirmed']].apply(display)

Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2020,1,0,0,1
2,2020,1,0,0,1


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,2020,2,0,0,2
4,2020,2,0,0,3
5,2020,2,0,0,3
6,2020,2,0,0,3
7,2020,2,0,0,3
8,2020,2,0,0,3
9,2020,2,0,0,3
10,2020,2,0,0,3
11,2020,2,0,0,3
12,2020,2,0,0,3


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
32,2020,3,0,0,3
33,2020,3,0,0,1
34,2020,3,0,0,3
35,2020,3,0,0,1
36,2020,3,0,0,1
37,2020,3,0,0,1
38,2020,3,3,0,3
39,2020,3,0,0,1
40,2020,3,0,0,6
41,2020,3,3,0,3


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
530,2020,4,1,0,83
531,2020,4,0,0,10
532,2020,4,0,0,1
533,2020,4,0,1,23
534,2020,4,0,0,16
535,2020,4,2,0,9
536,2020,4,6,2,152
537,2020,4,0,0,5
538,2020,4,5,6,82
539,2020,4,21,0,43


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1479,2020,5,16,0,33
1480,2020,5,403,33,1463
1481,2020,5,1,0,1
1482,2020,5,29,1,42
1483,2020,5,82,2,426
1484,2020,5,17,0,56
1485,2020,5,36,0,40
1486,2020,5,1094,59,3515
1487,2020,5,7,0,7
1488,2020,5,613,214,4395


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2523,2020,6,33,0,33
2524,2020,6,2349,62,3679
2525,2020,6,1,0,4
2526,2020,6,185,4,1272
2527,2020,6,1710,21,3815
2528,2020,6,199,4,293
2529,2020,6,115,1,498
2530,2020,6,0,0,2
2531,2020,6,8478,473,19844
2532,2020,6,42,0,70


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3604,2020,7,50,0,97
3605,2020,7,6511,187,14595
3606,2020,7,62,1,191
3607,2020,7,5647,12,8227
3608,2020,7,7687,67,10043
3609,2020,7,364,6,440
3610,2020,7,2250,13,2860
3611,2020,7,82,0,213
3612,2020,7,58348,2742,87360
3613,2020,7,596,3,1315


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4707,2020,8,214,5,548
4708,2020,8,63864,1349,140933
4709,2020,8,918,3,1591
4710,2020,8,30357,98,40269
4711,2020,8,33358,296,51233
4712,2020,8,667,15,1051
4713,2020,8,6230,53,9086
4714,2020,8,686,2,1100
4715,2020,8,120930,3963,135598
4716,2020,8,4211,45,5913


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5792,2020,9,2647,46,3132
5793,2020,9,330526,3969,434771
5794,2020,9,2885,7,4112
5795,2020,9,85461,306,109040
5796,2020,9,119540,582,136457
5797,2020,9,2431,56,4346
5798,2020,9,16989,277,31503
5799,2020,9,2081,2,2367
5800,2020,9,155678,4444,174748
5801,2020,9,13577,192,17418


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6842,2020,10,3608,53,3835
6843,2020,10,629211,5828,693484
6844,2020,10,6890,16,9796
6845,2020,10,145618,697,180811
6846,2020,10,169732,904,182728
6847,2020,10,9813,162,11938
6848,2020,10,81718,957,113602
6849,2020,10,2929,2,3040
6850,2020,10,247446,5361,279715
6851,2020,10,28125,428,33418


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7927,2020,11,4100,59,4332
7928,2020,11,792083,6690,823348
7929,2020,11,12959,37,14852
7930,2020,11,196054,930,206351
7931,2020,11,207480,1090,216086
7932,2020,11,13551,226,14418
7933,2020,11,163079,2101,187270
7934,2020,11,3210,2,3251
7935,2020,11,347476,6511,386706
7936,2020,11,40678,604,43626


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8977,2020,12,4550,61,4710
8978,2020,12,853232,6992,868064
8979,2020,12,15411,54,16282
8980,2020,12,208396,981,212776
8981,2020,12,227987,1264,234610
8982,2020,12,16070,277,17409
8983,2020,12,214826,2861,237322
8984,2020,12,3314,2,3332
8985,2020,12,528315,9174,570374
8986,2020,12,45940,688,47963


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10083,2021,1,871916,7108,882286
10084,2021,1,4826,62,4945
10085,2021,1,16564,56,16719
10086,2021,1,211910,1045,216211
10087,2021,1,245476,1397,251743
10088,2021,1,19045,317,19748
10089,2021,1,264769,3371,279575
10090,2021,1,3365,2,3378
10091,2021,1,609322,10536,625369
10092,2021,1,49388,739,51066


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
11199,2021,2,879405,7153,887836
11200,2021,2,4928,62,4994
11201,2021,2,16759,56,16828
11202,2021,2,214178,1082,217141
11203,2021,2,257169,1501,259563
11204,2021,2,20426,334,20925
11205,2021,2,297339,3701,305367
11206,2021,2,3390,2,3398
11207,2021,2,622882,10853,635096
11208,2021,2,51891,768,53409


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12207,2021,3,882029,7169,889916
12208,2021,3,4952,62,5020
12209,2021,3,16780,56,16836
12210,2021,3,214830,1092,216445
12211,2021,3,260594,1541,262534
12212,2021,3,21037,352,21770
12213,2021,3,305951,3835,312560
12214,2021,3,3400,2,3406
12215,2021,3,627044,10910,639289
12216,2021,3,53585,795,54986


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
13323,2021,4,887434,7217,901989
13324,2021,4,4976,62,5083
13325,2021,4,16785,56,16845
13326,2021,4,215445,1105,218412
13327,2021,4,262371,1576,265527
13328,2021,4,23702,379,26999
13329,2021,4,319488,4170,349187
13330,2021,4,3482,2,3661
13331,2021,4,642565,11027,662430
13332,2021,4,55653,830,58039


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
14403,2021,5,5701,67,5949
14404,2021,5,970718,7992,1101690
14405,2021,5,17134,59,18419
14406,2021,5,226643,1307,253123
14407,2021,5,362356,2560,470317
14408,2021,5,35263,478,42647
14409,2021,5,601161,8581,728700
14410,2021,5,5567,4,7504
14411,2021,5,1033825,16147,1149333
14412,2021,5,66939,1168,91052


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
15519,2021,6,6719,115,7005
15520,2021,6,1528360,10930,1693085
15521,2021,6,23402,115,27272
15522,2021,6,354810,3365,411216
15523,2021,6,685362,5163,706761
15524,2021,6,57526,753,60046
15525,2021,6,922674,13048,971463
15526,2021,6,9957,4,10286
15527,2021,6,1390963,24237,1426240
15528,2021,6,140254,2649,155666


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
16599,2021,7,7308,128,7467
16600,2021,7,1838469,12706,1889513
16601,2021,7,32923,172,35857
16602,2021,7,477510,4539,508484
16603,2021,7,710569,9588,721914
16604,2021,7,60708,808,61670
16605,2021,7,975077,13439,994480
16606,2021,7,10511,4,10560
16607,2021,7,1407832,24977,1434188
16608,2021,7,161361,3054,166689


Unnamed: 0_level_0,year,month,Cured,Deaths,Confirmed
Sno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
17715,2021,8,7400,129,7537
17716,2021,8,1931618,13377,1966175
17717,2021,8,43939,229,48122
17718,2021,8,547616,5260,566198
17719,2021,8,714735,9643,724835
17720,2021,8,61111,811,61953
17721,2021,8,986621,13524,1002008
17722,2021,8,10617,4,10650
17723,2021,8,1410631,25053,1436265
17724,2021,8,166941,3147,171146


In [280]:
df_year_month_metrics = df.groupby(['year', 'month'])[['Cured', 'Deaths', 'Confirmed']]

In [281]:
df_year_month_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,1,0,0,2
2020,2,0,0,86
2020,3,808,202,9687
2020,4,75443,13270,422442
2020,5,1133341,89834,2938234
2020,6,5668946,319690,10558374
2020,7,19980130,793511,31726501
2020,8,58580895,1553468,80749620
2020,9,118592934,2443374,149113758
2020,10,198824412,3457615,226770312


## Task 2: Grouping Total Confirmed, Cured, and Death Covid cases by Year, Month, and Week.

In [282]:
df_year_month_weekday_metrics = df.groupby(['year', 'month', 'weekday'])[['Cured', 'Deaths', 'Confirmed']]

In [283]:
df_year_month_weekday_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cured,Deaths,Confirmed
year,month,weekday,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020,1,3,0,0,1
2020,1,4,0,0,1
2020,2,0,0,0,12
2020,2,1,0,0,12
2020,2,2,0,0,12
2020,2,3,0,0,12
2020,2,4,0,0,12
2020,2,5,0,0,14
2020,2,6,0,0,12
2020,3,0,142,40,1849


## Task 3: Grouping Total Confirmed, Cured, and Death Covid cases by State and Year.

In [284]:
df_state_year_metrics = df.groupby(['State/UnionTerritory', 'year'])[['Cured', 'Deaths', 'Confirmed']]

In [285]:
df_state_year_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed
State/UnionTerritory,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Andaman and Nicobar Islands,2020,534731,7772,590838
Andaman and Nicobar Islands,2021,1313555,19364,1347660
Andhra Pradesh,2020,95094768,871178,104134066
Andhra Pradesh,2021,275331762,2068189,288298687
Arunachal Pradesh,2020,1442769,4488,1679508
Arunachal Pradesh,2021,5145380,22311,5497399
Assam,2020,23558817,108050,26440101
Assam,2021,69119863,530273,73396910
Bihar,2020,26953520,151678,29298924
Bihar,2021,99571850,960669,104363151


## Task 4: Grouping Total Confirmed, Cured, and Death Covid cases by State, Year and Month.

In [286]:
df_state_year_month_metrics = df.groupby(['State/UnionTerritory', 'year', 'month'])[['Cured', 'Deaths', 'Confirmed']]

In [287]:
df_state_year_month_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cured,Deaths,Confirmed
State/UnionTerritory,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Andaman and Nicobar Islands,2020,3,0,0,36
Andaman and Nicobar Islands,2020,4,211,0,485
Andaman and Nicobar Islands,2020,5,969,0,1023
Andaman and Nicobar Islands,2020,6,1075,0,1352
Andaman and Nicobar Islands,2020,7,3809,8,6372
Andaman and Nicobar Islands,2020,8,38101,794,63866
Andaman and Nicobar Islands,2020,9,96860,1526,105810
Andaman and Nicobar Islands,2020,10,118562,1726,126128
Andaman and Nicobar Islands,2020,11,129659,1817,136011
Andaman and Nicobar Islands,2020,12,145485,1901,149755


## Task 5: Year-wise Covid report for the state of Kerala in the years 2020 and 2021.

In [288]:
df_kerala_year_metrics = df[df['State/UnionTerritory'] == 'Kerala'].groupby(['State/UnionTerritory', 'year'])[['Cured', 'Deaths', 'Confirmed']]

In [289]:
df_kerala_year_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Cured,Deaths,Confirmed
State/UnionTerritory,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Kerala,2020,42951434,193376,51854118
Kerala,2021,377222801,1694801,407051905


## Task 6: Month-wise Covid report for the state of Kerala in the years 2020 and 2021.

In [290]:
df_kerala_year_month_metrics = df[df['State/UnionTerritory'] == 'Kerala'].groupby(['State/UnionTerritory', 'year', 'month'])[['Cured', 'Deaths', 'Confirmed']]

In [291]:
df_kerala_year_month_metrics.sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cured,Deaths,Confirmed
State/UnionTerritory,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Kerala,2020,1,0,0,2
Kerala,2020,2,0,0,86
Kerala,2020,3,152,3,1690
Kerala,2020,4,5981,84,11496
Kerala,2020,5,15219,142,20734
Kerala,2020,6,37829,543,78399
Kerala,2020,7,170170,1226,345425
Kerala,2020,8,897214,5049,1399347
Kerala,2020,9,2528807,14165,3570633
Kerala,2020,10,6918689,33918,9763929
