# ML Work

## Predicting total vaccinations administered per hundred and total people vaccinated per hundred based on happiness factors 

### Splitting Data

In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import and prepare data
happiness = pd.read_csv('happiness.csv')
vaccinations = pd.read_csv('country_vaccinations.csv')
vaccinations.head()
total_vaccinations = vaccinations.groupby('country').max()[['total_vaccinations_per_hundred', 'people_vaccinated_per_hundred']]
merged = pd.merge(happiness, total_vaccinations, left_on='location', right_on='country').dropna()
data = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
data = StandardScaler().fit_transform(data)
target = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)


### Linear Regression

In [2]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 520.5692285323674
r2: 3.4892832205479785e-05


### K Nearest Neighbors

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 2}
MSE: 466.31633124999985
r2: 0.0853212717459052


### Polynomial Features

In [4]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,7):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 5011.949209431219
r2: -9.613458026593811


For Degree =  3
MSE: 4479.891749552626
r2: -8.455276556870581


For Degree =  4
MSE: 911.1023092229955
r2: -0.8704910390827177


For Degree =  5
MSE: 4115.653387715832
r2: -7.305441369510589


For Degree =  6
MSE: 8062.783731128486
r2: -16.009018052741585




### Convolutional Neural Network

In [5]:
#import
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

ModuleNotFoundError: No module named 'torch'

## Predicting happiness factors based on total vaccinations administered per hundred and total people vaccinated per hundred

### Splitting Data

In [None]:
target = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
target = StandardScaler().fit_transform(data)
data = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)

### Linear Regression

In [None]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

### K Nearest Neighbors

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

### Polynomial Features

In [None]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,10):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')