# CovidData Analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
try:
    data = pd.read_csv(r"4. covid_19_data.csv")
    print ("Covid Data dataset has {} samples with {} features each.".format(*data.shape))
except:
    print ("Dataset could not be loaded. Is the dataset missing?")

Covid Data dataset has 321 samples with 6 features each.


In [3]:
data.head()

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7


In [4]:
data.shape

(321, 6)

In [5]:
# Display a description of the dataset
stats = data.describe()
stats

Unnamed: 0,Confirmed,Deaths,Recovered
count,321.0,321.0,321.0
mean,9949.800623,709.152648,3030.277259
std,31923.853086,3236.162817,14364.870365
min,0.0,0.0,0.0
25%,104.0,2.0,2.0
50%,653.0,12.0,73.0
75%,4655.0,144.0,587.0
max,299691.0,27682.0,132929.0


In [6]:
# Retrieve column names
data.columns

Index(['Date', 'State', 'Region', 'Confirmed', 'Deaths', 'Recovered'], dtype='object')

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 321 entries, 0 to 320
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       321 non-null    object
 1   State      140 non-null    object
 2   Region     321 non-null    object
 3   Confirmed  321 non-null    int64 
 4   Deaths     321 non-null    int64 
 5   Recovered  321 non-null    int64 
dtypes: int64(3), object(3)
memory usage: 15.2+ KB


In [8]:
data.index

RangeIndex(start=0, stop=321, step=1)

In [9]:
data.isna().sum()

Date           0
State        181
Region         0
Confirmed      0
Deaths         0
Recovered      0
dtype: int64

# 1.Show the confirmed death and recovered of each region 

In [10]:
data.groupby('Region').sum()

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,1939,60,252
Albania,766,30,455
Algeria,3848,444,1702
Andorra,743,42,423
Angola,27,2,7
...,...,...,...
West Bank and Gaza,344,2,71
Western Sahara,6,0,5
Yemen,6,0,1
Zambia,97,3,54


In [11]:
data.groupby('Region')['Confirmed'].sum().sort_values(ascending=False).head(10)

Region
US                1039909
Spain              236899
Italy              203591
France             166543
UK                 166441
Germany            161539
Turkey             117589
Russia              99399
Iran                93657
Mainland China      82862
Name: Confirmed, dtype: int64

# 2.Remove all case where confirmed less than 10

In [12]:
newdf=data[~(data.Confirmed<10)]
newdf

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
0,4/29/2020,,Afghanistan,1939,60,252
1,4/29/2020,,Albania,766,30,455
2,4/29/2020,,Algeria,3848,444,1702
3,4/29/2020,,Andorra,743,42,423
4,4/29/2020,,Angola,27,2,7
...,...,...,...,...,...,...
316,4/29/2020,Wyoming,US,545,7,0
317,4/29/2020,Xinjiang,Mainland China,76,3,73
318,4/29/2020,Yukon,Canada,11,0,0
319,4/29/2020,Yunnan,Mainland China,185,2,181


# 2.In which region ,maximum confirm cases is recorded

In [13]:
data.groupby('Region')['Confirmed'].sum().sort_values(ascending=False).head(10)

Region
US                1039909
Spain              236899
Italy              203591
France             166543
UK                 166441
Germany            161539
Turkey             117589
Russia              99399
Iran                93657
Mainland China      82862
Name: Confirmed, dtype: int64

# 3.In which region ,minimum death cases is recorded

In [16]:
data.groupby('Region')['Deaths'].sum().sort_values().head(50)

Region
Laos                                0
Mongolia                            0
Mozambique                          0
Cambodia                            0
Fiji                                0
Namibia                             0
Nepal                               0
Madagascar                          0
Macau                               0
Papua New Guinea                    0
Rwanda                              0
Saint Kitts and Nevis               0
Bhutan                              0
Dominica                            0
Central African Republic            0
Saint Lucia                         0
Holy See                            0
Sao Tome and Principe               0
Yemen                               0
Western Sahara                      0
Eritrea                             0
Vietnam                             0
Saint Vincent and the Grenadines    0
Timor-Leste                         0
Uganda                              0
Grenada                             0
South

# 4.How many confirmed ,death and recoverd cases reported from India till april 2020?

In [18]:
data[data.Region=='India']

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
74,4/29/2020,,India,33062,1079,8437


# 5.Sort entire data wto no.of confirmed cases in ascending order

In [19]:
data.sort_values(by=['Confirmed'],ascending=True)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
285,4/29/2020,Recovered,US,0,0,120720
284,4/29/2020,Recovered,Canada,0,0,20327
203,4/29/2020,Diamond Princess cruise ship,Canada,0,1,0
305,4/29/2020,Tibet,Mainland China,1,0,1
289,4/29/2020,Saint Pierre and Miquelon,France,1,0,0
...,...,...,...,...,...,...
57,4/29/2020,,France,165093,24087,48228
168,4/29/2020,,UK,165221,26097,0
80,4/29/2020,,Italy,203591,27682,71252
153,4/29/2020,,Spain,236899,24275,132929


# 6.Sort entire data wto no.of recoverd cases in descending order

In [20]:
data.sort_values(by=['Recovered'],ascending=False)

Unnamed: 0,Date,State,Region,Confirmed,Deaths,Recovered
153,4/29/2020,,Spain,236899,24275,132929
285,4/29/2020,Recovered,US,0,0,120720
61,4/29/2020,,Germany,161539,6467,120400
76,4/29/2020,,Iran,93657,5957,73791
80,4/29/2020,,Italy,203591,27682,71252
...,...,...,...,...,...,...
248,4/29/2020,Maryland,US,20849,1078,0
246,4/29/2020,Manitoba,Canada,275,6,0
243,4/29/2020,Louisiana,US,27660,1845,0
241,4/29/2020,Kentucky,US,4537,234,0
