# ML Work

## Predicting total vaccinations administered per hundred and total people vaccinated per hundred based on happiness factors 

### Linear Regression

In [40]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import and prepare data
happiness = pd.read_csv('happiness.csv')
vaccinations = pd.read_csv('country_vaccinations.csv')
vaccinations.head()
total_vaccinations = vaccinations.groupby('country').max()[['total_vaccinations_per_hundred', 'people_vaccinated_per_hundred']]
merged = pd.merge(happiness, total_vaccinations, left_on='location', right_on='country').dropna()
data = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
data = StandardScaler().fit_transform(data)
target = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.5, random_state=216)
print(merged.columns)

Index(['location', 'Regional indicator', 'Ladder score',
       'Standard error of ladder score', 'upperwhisker', 'lowerwhisker',
       'Logged GDP per capita', 'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'Ladder score in Dystopia',
       'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption',
       'Dystopia + residual', 'total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred'],
      dtype='object')


In [41]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 315.27963420569324
r2: -0.13579171458294548


### K Nearest Neighbors

In [45]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,15)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 3}
MSE: 235.7241603174603
r2: 0.16054880797772025


### Polynomial Features

In [43]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,10):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 7189.473134979971
r2: -26.710660085786323


For Degree =  3
MSE: 10186.588166849699
r2: -39.68558834049345


For Degree =  4
MSE: 17277.88573069532
r2: -71.59135689966418


For Degree =  5
MSE: 127284.86183685268
r2: -425.8150254406695


For Degree =  6
MSE: 2319166.7700975817
r2: -9704.237106106655


For Degree =  7
MSE: 12474987.207826631
r2: -37248.025420807564


For Degree =  8
MSE: 293889209.93946934
r2: -1244883.8800865812


For Degree =  9
MSE: 434725776.4302221
r2: -1043311.1381028489


