In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
#loading the dataset
dataset=pd.read_csv('covid_dataset_final.csv')

In [5]:
#the data set has 183316 rows and 71 features as of now but we may extract certain new features in future
dataset.shape

(183316, 71)

# Features 

## 1. iso_code

In [7]:
#A total of 243 unique values,The ISO country codes are internationally recognized codes that designate every country
dataset.iso_code.describe()

count     183316
unique       243
top          ARG
freq        1279
Name: iso_code, dtype: object

## 2. Continent

In [13]:
#We have a total data of several countries from 6 continents except for Antarctica
#We will use the continent details to analyse at one more upper level i.e continent wise analysis

'''Oceania is a geographic region that includes Australasia, Melanesia, Micronesia and Polynesia.
Spanning the Eastern and Western Hemispheres'''

print("Continents : \n\n",list(dataset.continent.unique()),"\n")
dataset.continent.describe()

Continents : 

 ['Asia', nan, 'Europe', 'Africa', 'North America', 'South America', 'Oceania'] 



count     163394
unique         6
top       Africa
freq       40957
Name: continent, dtype: object

## 3. Location

In [36]:
#Location are different country/places names
#We have a total of 239 country/plcaes data which will help in analysis at far better level

# print("\n\nCountries : \n\n ",list(dataset.location.unique()),"\n")

dataset.location.describe()

count     173714
unique       239
top       Mexico
freq         823
Name: location, dtype: object

## 4. Data related to positive COVID-19 Cases

### 4.1 Total_Cases

In [26]:
#Count of total active cases till that day in that location/country
dataset.total_cases.describe()

count    1.696770e+05
mean     2.769817e+06
std      1.711460e+07
min      1.000000e+00
25%      2.215000e+03
50%      2.930600e+04
75%      3.231600e+05
max      4.906608e+08
Name: total_cases, dtype: float64

### 4.2 new cases

In [27]:
#Total new positive covid-19 cases reported on a particular date on a particular location
dataset.new_cases.describe()

count    1.694770e+05
mean     1.230845e+04
std      8.846091e+04
min      0.000000e+00
25%      1.000000e+00
50%      7.900000e+01
75%      1.077000e+03
max      4.089078e+06
Name: new_cases, dtype: float64

### 4.3 Total cases per million

In [28]:
#Count of total positive covid cases per million on a particular day in a particular country
dataset.total_cases_per_million.describe()

count    168891.000000
mean      33126.088231
std       60314.140105
min           0.001000
25%         659.337500
50%        5152.496000
75%       41583.150500
max      706541.904000
Name: total_cases_per_million, dtype: float64

### 4.4 Total new cases per million

In [29]:
#Total new cases reported per million people on particular day in a particular country
dataset.new_cases_per_million.describe()

count    168691.000000
mean        179.550771
std         739.696280
min           0.000000
25%           0.033000
50%          11.373000
75%         103.596000
max       51427.491000
Name: new_cases_per_million, dtype: float64

# 5. Death related data of people due to COVID-19

### 5.1 Total Death

In [35]:
#count of total deaths due to covid-19 till a particular in a particular country
dataset.total_deaths.describe()

count    1.516070e+05
mean     6.000259e+04
std      3.139668e+05
min      1.000000e+00
25%      8.400000e+01
50%      8.220000e+02
75%      7.670500e+03
max      6.151170e+06
Name: total_deaths, dtype: float64

### 5.2 New Deaths

In [37]:
#count of new deaths due to covid-19 on a particular date in a particular country
dataset.new_deaths.describe()

count    151618.000000
mean        168.316242
std         821.579593
min           0.000000
25%           0.000000
50%           2.000000
75%          19.000000
max       18156.000000
Name: new_deaths, dtype: float64

### 5.3 Total deaths per million

In [39]:
#count of total deaths per million till a particular date in a particular country due to covid-19
dataset.total_deaths_per_million.describe()

count    150834.000000
mean        534.914547
std         814.732215
min           0.000000
25%          19.992000
50%         141.907500
75%         764.432250
max        6363.990000
Name: total_deaths_per_million, dtype: float64

### 5.4 Total new deaths per million

In [40]:
#count of total deaths per million on a particular date in a particular country due to covid-19
dataset.new_deaths_per_million.describe()

count    150845.000000
mean          1.669386
std           5.137567
min           0.000000
25%           0.000000
50%           0.118000
75%           1.358000
max         453.772000
Name: new_deaths_per_million, dtype: float64

# 6. Hospital data

## 6.1 Covid Patients' Data

In [46]:
#6.1.1

#Number of patients in hospital right now on a particular date in a particular loc due to covid-19
dataset.hosp_patients.describe()

count     24960.000000
mean       4287.762460
std       11612.848804
min           0.000000
25%         140.000000
50%         773.500000
75%        2967.500000
max      154540.000000
Name: hosp_patients, dtype: float64

In [47]:
#6.1.2

#Number of patients admitted in hospital on a weekly basis on a particular date in a particular loc due to covid-19
dataset.weekly_hosp_admissions.describe()

count     11394.000000
mean       5914.675180
std       14498.751913
min           0.000000
25%         345.000000
50%        1393.000000
75%        5343.500000
max      154002.000000
Name: weekly_hosp_admissions, dtype: float64

In [49]:
#6.1.3

#Number of patients admitted in hospital per million on a particular date in a particular loc due to covid-19
dataset.hosp_patients_per_million.describe()

count    24960.000000
mean       170.759845
std        208.581058
min          0.000000
25%         28.800500
50%         91.597000
75%        232.307500
max       1544.082000
Name: hosp_patients_per_million, dtype: float64

In [50]:
#6.1.4

#Number of patients admitted in hospital per million per week in a particular loc due to covid-19
dataset.weekly_hosp_admissions_per_million.describe()

count    11394.000000
mean       105.112701
std        107.271152
min          0.000000
25%         24.925000
50%         75.397000
75%        144.141000
max        645.808000
Name: weekly_hosp_admissions_per_million, dtype: float64

## 6.2 Critical patients data

In [53]:
'''this data will help us realise that how the number of criticle patients increased or decreases after
   vaccination in that country'''

### 6.2.1 ICU Patients Data

In [54]:
#data of patients in icu on a particular date in a particular country/place

dataset.icu_patients.describe()

count    23548.000000
mean       910.592152
std       2677.688140
min          0.000000
25%         30.000000
50%        155.000000
75%        600.000000
max      28891.000000
Name: icu_patients, dtype: float64

In [56]:
#6.2.2

#data of icu patients per million on that particular date in that particular place

dataset.icu_patients_per_million.describe()

count    23548.000000
mean        24.048817
std         27.686017
min          0.000000
25%          4.495000
50%         13.712000
75%         34.678750
max        177.282000
Name: icu_patients_per_million, dtype: float64

In [57]:
#6.2.3

#weekly ICU admission

dataset.weekly_icu_admissions.describe()

count    5716.000000
mean      466.564206
std       624.017787
min         0.000000
25%        50.750000
50%       218.000000
75%       659.000000
max      4838.000000
Name: weekly_icu_admissions, dtype: float64

In [59]:
#6.2.4

#Weekly ICU admissions per million

dataset.weekly_icu_admissions_per_million.describe()

count    5716.000000
mean       15.260753
std        16.288929
min         0.000000
25%         4.090000
50%        10.939500
75%        20.112250
max       221.212000
Name: weekly_icu_admissions_per_million, dtype: float64

# 7. Covid-19 Test Data

In [None]:
'''There may be a case later that since people stopped getting themselves tested for covid-19 so the number 
   of covid-19 cases dropped,so to analyse in more detailed level we have included this attribute'''

### 7.1 Total Tests till date

In [66]:
#number of people who have got themselves tested for covid-19 virus in a center

dataset.total_tests.describe()

count    7.150100e+04
mean     1.840513e+07
std      6.796902e+07
min      0.000000e+00
25%      3.940530e+05
50%      1.981651e+06
75%      9.364213e+06
max      8.464898e+08
Name: total_tests, dtype: float64

### 7.2 Total new tests 

In [67]:
#number of people who have got tested for covid-19 virus in a center on a particular date and location

dataset.new_tests.describe()

count    6.904300e+04
mean     6.880317e+04
std      2.164677e+05
min      1.000000e+00
25%      2.461000e+03
50%      9.428000e+03
75%      3.825700e+04
max      3.740296e+06
Name: new_tests, dtype: float64

In [70]:
#Total new tests per thousand

'''There may come a case that officials stopped doing tests or people stopped coming for getting themselves tested
    for tests because of which the details of new cases decresed '''

dataset.total_tests_per_thousand.describe()

count    71501.000000
mean       799.874967
std       1961.913392
min          0.000000
25%         37.735000
50%        193.923000
75%        759.113000
max      31688.973000
Name: total_tests_per_thousand, dtype: float64

In [75]:
#Tests per cases 

'''on an average how many times was covid test done on a person before declaring him/her healthy'''

dataset.tests_per_case.describe()

count     81886.000000
mean        149.514327
std        2188.714056
min           1.000000
25%           7.000000
50%          16.900000
75%          50.400000
max      189932.000000
Name: tests_per_case, dtype: float64

## 8. Positive rate

In [77]:
'''Percentage of people who turned out to be positive out of the total people that tested themselves for covid-19'''

#we will use this data to detect the trend in positive rate over time before and after vaccination drive

dataset.positive_rate.describe()

count    82465.000000
mean         0.099483
std          0.112780
min          0.000000
25%          0.019100
50%          0.058400
75%          0.141000
max          0.991200
Name: positive_rate, dtype: float64

# 9. Vaccination Data

In [84]:
#Total Vaccinations 

'''Total number of COVID-19 vaccination doses administered'''

dataset.total_vaccinations.describe()

count    5.712600e+04
mean     1.631143e+08
std      7.663326e+08
min      0.000000e+00
25%      8.893698e+05
50%      6.435484e+06
75%      3.498642e+07
max      1.129202e+10
Name: total_vaccinations, dtype: float64

In [85]:
#people_vaccinated

'''Total number of people who received at least one vaccine dose'''

dataset.people_vaccinated.describe()

count    5.477800e+04
mean     8.167984e+07
std      3.847020e+08
min      0.000000e+00
25%      5.521165e+05
50%      3.871285e+06
75%      1.996361e+07
max      5.078752e+09
Name: people_vaccinated, dtype: float64

In [87]:
#people_fully_vaccinated

'''Total number of people who received all doses prescribed by the initial vaccination protocol'''

dataset.people_fully_vaccinated.describe()

count    5.212500e+04
mean     6.511669e+07
std      3.207568e+08
min      1.000000e+00
25%      4.012940e+05
50%      2.932292e+06
75%      1.562565e+07
max      4.565219e+09
Name: people_fully_vaccinated, dtype: float64

In [89]:
#total_boosters

'''Total number of COVID-19 vaccination booster doses administered (doses administered beyond the
                        number prescribed by the vaccination protocol)'''

dataset.total_boosters.describe()

count    2.009300e+04
mean     2.408341e+07
std      1.137042e+08
min      1.000000e+00
25%      3.269000e+03
50%      5.557620e+05
75%      5.045856e+06
max      1.651903e+09
Name: total_boosters, dtype: float64

In [93]:
#new_vaccinations

'''New COVID-19 vaccination doses administered (only calculated for consecutive days)'''

dataset.new_vaccinations.describe()

count    3.931400e+04
mean     1.158698e+06
std      4.306442e+06
min      0.000000e+00
25%      6.005250e+03
50%      4.066300e+04
75%      2.764615e+05
max      5.449888e+07
Name: new_vaccinations, dtype: float64

# 10. Data related to People and Population of that area

In [None]:
#population 

'''Population of that region/country/location'''

#population_density

'''Number of people divided by land area, measured in square kilometers, most recent year available'''
                                                #or
                        '''measurement of population per unit area'''


    '''WE CAN USE THIS DATA TO TRACK HOW VACCINATION PERFORMED IN DENSLY AND SPARSELY POPULATED AREA'''
                        '''and what was the condition before vaccination'''

In [98]:
#median_age
            '''Median age of the population, UN projection for 2020'''
#aged_65_older
            '''Share of the population that is 65 years and older, most recent year available'''
#aged_70_older
            '''Share of the population that is 70 years and older in 2015'''

# 11. Data related to financial condition of country and people of that area

In [100]:
'''This will help in analysing how the vaccines performed in a develoed,under-developed and developing
    country,as well as poor vs rich countries'''



In [None]:
#gdp_per_capita
'''Gross domestic product at purchasing power parity (constant 2011 international dollars),
    most recent year available'''

#extreme_poverty

'''Share of the population living in extreme poverty, most recent year available since 2010'''

# 12. Vaccines

In [None]:
''''Moderna', 'Oxford/AstraZeneca', 'Sinopharm/Beijing', 'Sputnik V',
       'CanSino', 'Pfizer/BioNTech', 'Johnson&Johnson', 'Novavax', 'Sinovac''''

#these are the names of the different vaccines being supplied to different countries 