In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df=pd.read_excel("77countries.xlsx")


In [4]:
df.head()

Unnamed: 0,continent,location,total_deaths,R0,total_cases,new_cases,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,life_expectancy
0,North America,United States,185744,2.465409,6114406,38754,331002647,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,2.77,78.86
1,South America,Brazil,123780,2.000668,3997865,46934,212559409,25.04,33.5,8.552,5.06,14103.452,3.4,177.961,8.11,10.1,17.9,2.2,75.88
2,Asia,India,67376,1.731947,3853406,83883,1380004385,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,0.53,69.66
3,North America,Mexico,65816,1.800513,610957,4921,128932753,66.444,29.3,6.857,4.321,17336.469,2.5,152.783,13.06,6.9,21.4,1.38,75.05
4,Europe,United Kingdom,41514,2.042228,338676,1508,67886004,272.898,40.8,18.517,12.527,39753.244,0.2,122.137,4.28,20.0,24.7,2.54,81.32


In [5]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   continent                   77 non-null     object 
 1   location                    77 non-null     object 
 2   total_deaths                77 non-null     int64  
 3   R0                          77 non-null     float64
 4   total_cases                 77 non-null     int64  
 5   new_cases                   77 non-null     int64  
 6   population                  77 non-null     int64  
 7   population_density          77 non-null     float64
 8   median_age                  77 non-null     float64
 9   aged_65_older               77 non-null     float64
 10  aged_70_older               76 non-null     float64
 11  gdp_per_capita              77 non-null     float64
 12  extreme_poverty             58 non-null     float64
 13  cardiovasc_death_rate       77 non-nu

In [6]:
df.dropna(inplace=True)

In [7]:
df.shape

(51, 19)

In [8]:
df_numerical=df.iloc[:,2:]

In [9]:
df_numerical.shape

(51, 17)

### Using only Elasticnet regression

Elastic net linear regression uses the penalties from both the lasso and ridge techniques to regularize regression models.Ridge regression decreases the complexity of a model but does not reduce the number of variables, it rather just shrinks their effect.Lasso on the other hand can eliminate some features entirely and give a subset of predictors that helps mitigate multi-collinearity and model complexity. Elastic net will shrink some coefficients and set some to 0 for sparse selection thus using the feature of both the lasso and ridge and making a more balanced model. 

* The elastic net method performs variable selection and regularization simultaneously.
* Groupings and variables selection are the key roles of the elastic net technique.


In [10]:
df_numerical.head()

Unnamed: 0,total_deaths,R0,total_cases,new_cases,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,life_expectancy
0,185744,2.465409,6114406,38754,331002647,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,2.77,78.86
1,123780,2.000668,3997865,46934,212559409,25.04,33.5,8.552,5.06,14103.452,3.4,177.961,8.11,10.1,17.9,2.2,75.88
2,67376,1.731947,3853406,83883,1380004385,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,0.53,69.66
3,65816,1.800513,610957,4921,128932753,66.444,29.3,6.857,4.321,17336.469,2.5,152.783,13.06,6.9,21.4,1.38,75.05
4,41514,2.042228,338676,1508,67886004,272.898,40.8,18.517,12.527,39753.244,0.2,122.137,4.28,20.0,24.7,2.54,81.32


In [11]:

y_train_reg = df_numerical['total_deaths']
x_train_reg = df_numerical.iloc[:,1:]
x_test=x_train_reg.iloc[2:3,:]

In [12]:
x_train_reg.head()

Unnamed: 0,R0,total_cases,new_cases,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,life_expectancy
0,2.465409,6114406,38754,331002647,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,2.77,78.86
1,2.000668,3997865,46934,212559409,25.04,33.5,8.552,5.06,14103.452,3.4,177.961,8.11,10.1,17.9,2.2,75.88
2,1.731947,3853406,83883,1380004385,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,0.53,69.66
3,1.800513,610957,4921,128932753,66.444,29.3,6.857,4.321,17336.469,2.5,152.783,13.06,6.9,21.4,1.38,75.05
4,2.042228,338676,1508,67886004,272.898,40.8,18.517,12.527,39753.244,0.2,122.137,4.28,20.0,24.7,2.54,81.32


In [13]:
x_test

Unnamed: 0,R0,total_cases,new_cases,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,hospital_beds_per_thousand,life_expectancy
2,1.731947,3853406,83883,1380004385,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,0.53,69.66


In [14]:
from sklearn.linear_model import ElasticNet

en_reg = ElasticNet(random_state=0)
en_reg.fit(x_train_reg,y_train_reg)

en_reg.predict(x_test)

array([72393.51184758])

In [18]:
def dictionary(df,location):
    df_location=df[df['location']==location]
    df_location=df_location.iloc[:,3:]

    df_location_dict=df_location.to_dict('records')
    
    return df_location_dict

In [19]:
location="India"
df_location_dict=dictionary(df,location)

In [20]:
df_location_dict

[{'R0': 1.73194668151632,
  'total_cases': 3853406,
  'new_cases': 83883,
  'population': 1380004385,
  'population_density': 450.419,
  'median_age': 28.2,
  'aged_65_older': 5.989,
  'aged_70_older': 3.414,
  'gdp_per_capita': 6426.674,
  'extreme_poverty': 21.2,
  'cardiovasc_death_rate': 282.28,
  'diabetes_prevalence': 10.39,
  'female_smokers': 1.9,
  'male_smokers': 20.6,
  'hospital_beds_per_thousand': 0.53,
  'life_expectancy': 69.66}]

In [21]:
death_old=en_reg.intercept_
k=0
for i in df_location_dict[0].values():
    death_old=death_old+(i*en_reg.coef_[k])
    k=k+1

In [22]:
death_old

72393.5118475761

In [None]:
total_death_original=67376

In [27]:
72393-67376

5017

In [79]:
def simulation(parameter,change,by,df_location_dict,death_old):
    df_location_dict_copy=df_location_dict.copy()
    for x in range(len(parameter)):
        if change[x]=='increase':
            df_location_dict_copy[parameter[x]]=(df_location_dict[parameter[x]]+(by[x]/100)*df_location_dict[parameter[x]])
        else:
            df_location_dict_copy[parameter[x]]=(df_location_dict[parameter[x]]-(by[x]/100)*df_location_dict[parameter[x]])
     
    death_new=en_reg.intercept_
    k=0
    #print(df_location_dict_copy)
    for i in df_location_dict_copy.values():
        death_new=death_new+(i*en_reg.coef_[k])
        k=k+1
    if death_new>death_old:
        c="increased"
    else:
        c="decreased"
    
    death_change=round(((abs(death_new-death_old)/death_old)*100),2)
    if len(parameter)==1:
        print("If " +parameter[0]+" "+ change[0]+" by "+ str(by[0]) +"% i.e. from " + str(df_location_dict[parameter[0]]) +" to "+str(df_location_dict_copy[parameter[0]])+ ", then death will be "+ c +" by "+str(death_change)+"% i.e. from "+str(int(death_old))+" to "+str(int(death_new))+" (count="+str(int(abs(death_new-death_old)))+ ")")
    else:
        s=""
        for i in range(len(parameter)):
            s=s+parameter[i]+" "+ change[i]+" by "+ str(by[i]) +"% i.e. from " + str(df_location_dict[parameter[i]]) +" to "+str(df_location_dict_copy[parameter[i]])+" ,"
        print("If " +s+" then death will be "+ c +" by "+str(death_change)+"% i.e. from "+str(int(death_old))+" to "+str(int(death_new))+" (count="+str(int(abs(death_new-death_old)))+ ")")

In [80]:
parameter=['hospital_beds_per_thousand','diabetes_prevalence','median_age','total_cases','aged_70_older','extreme_poverty','female_smokers','male_smokers','R0']
change=['decrease','increase','increase','increase','increase','decrease','decreaase','decrease','decrease']
by=[30,20,10,10,20,30,10,20,10]
simulation(parameter,change,by,df_location_dict[0],death_old)

If hospital_beds_per_thousand decrease by 30% i.e. from 0.53 to 0.371 ,diabetes_prevalence increase by 20% i.e. from 10.39 to 12.468 ,median_age increase by 10% i.e. from 28.2 to 31.02 ,total_cases increase by 10% i.e. from 3853406 to 4238746.6 ,aged_70_older increase by 20% i.e. from 3.414 to 4.0968 ,extreme_poverty decrease by 30% i.e. from 21.2 to 14.84 ,female_smokers decreaase by 10% i.e. from 1.9 to 1.71 ,male_smokers decrease by 20% i.e. from 20.6 to 16.48 ,R0 decrease by 10% i.e. from 1.73194668151632 to 1.558752013364688 , then death will be increased by 23.42% i.e. from 72393 to 89347 (count=16953)


Important features:
* diabetes_prevalence
* hospital_beds_per_thousand
* median_age
* total_cases
* aged_70_older
* extreme_poverty
* female_smokers
* male_smokers
* R0

In [32]:
0.53-(0.53*.3)

0.371

In [81]:
intercept=en_reg.intercept_
R0_coef=en_reg.coef_[0]
total_cases_coef=en_reg.coef_[1]
new_cases_coef=en_reg.coef_[2]
population_coef=en_reg.coef_[3]
population_density_coef=en_reg.coef_[4]
median_age_coef=en_reg.coef_[5]
aged_65_older_coef=en_reg.coef_[6]
aged_70_older_coef=en_reg.coef_[7]
gdp_per_capita_coef=en_reg.coef_[8]
extreme_poverty_coef=en_reg.coef_[9]
cardiovasc_death_rate_coef=en_reg.coef_[10]
diabetes_prevalence_coef=en_reg.coef_[11]
female_smokers_coef=en_reg.coef_[12]
male_smokers_coef=en_reg.coef_[13]
hospital_beds_per_thousand_coef=en_reg.coef_[14]
life_expectancy_coef=en_reg.coef_[15]

In [85]:
R0=1.558752013364688
total_cases=4238746.6
new_cases=83883
population=1380004385
population_density=450.419
median_age=31.02
aged_65_older=5.989
aged_70_older=4.0968
gdp_per_capita=6426.674
extreme_poverty=14.84
cardiovasc_death_rate=282.28
diabetes_prevalence=12.468
female_smokers=1.71 
male_smokers=16.48
hospital_beds_per_thousand=0.37
life_expectancy=69.66

In [86]:
death=intercept+(R0_coef*R0)+(total_cases_coef*total_cases)+(new_cases_coef*new_cases)+(population_coef*population)+(population_density_coef*population_density)+(median_age_coef*median_age)+(aged_65_older_coef*aged_65_older)+(aged_70_older_coef*aged_70_older)+(gdp_per_capita_coef*gdp_per_capita)+(extreme_poverty_coef*extreme_poverty)+(cardiovasc_death_rate_coef*cardiovasc_death_rate)+(diabetes_prevalence_coef*diabetes_prevalence)+(female_smokers_coef*female_smokers)+(male_smokers_coef*male_smokers)+(hospital_beds_per_thousand_coef*hospital_beds_per_thousand)+(life_expectancy_coef*life_expectancy)

In [87]:
death

89347.81314527182

In [None]:
72393

In [88]:
(89347-72393)/72393

0.23419391377619383