# ML Work

## Predicting total vaccinations administered per hundred and total people vaccinated per hundred based on happiness factors 

### Splitting Data

In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import and prepare data
happiness = pd.read_csv('happiness.csv')
vaccinations = pd.read_csv('country_vaccinations.csv')
vaccinations.head()
total_vaccinations = vaccinations.groupby('country').max()[['total_vaccinations_per_hundred', 'people_vaccinated_per_hundred']]
merged = pd.merge(happiness, total_vaccinations, left_on='location', right_on='country').dropna()
data = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
data = StandardScaler().fit_transform(data)
target = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)


### Linear Regression

In [2]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 520.5692281662408
r2: 3.489358855152114e-05


### K Nearest Neighbors

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 2}
MSE: 466.31633124999985
r2: 0.0853212717459052


### Polynomial Features

In [4]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,7):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 5361.750600052845
r2: -10.329005458555335


For Degree =  3
MSE: 5651.850740386821
r2: -10.961210674851952


For Degree =  4
MSE: 1188.4733105486152
r2: -1.4482175375822213


For Degree =  5
MSE: 8445.681888037576
r2: -16.95819019592493


For Degree =  6
MSE: 15946.990801320071
r2: -33.36706197631512




## Predicting happiness factors based on total vaccinations administered per hundred and total people vaccinated per hundred

### Splitting Data

In [5]:
target = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
target = StandardScaler().fit_transform(data)
data = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)

### Linear Regression

In [6]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 1.4764039062843943
r2: -0.5262761143823431


### K Nearest Neighbors

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 18}
MSE: 0.5960565996609892
r2: 0.3699128041077057


### Polynomial Features

In [8]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,10):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 81.94002429456545
r2: -82.99236974709474


For Degree =  3
MSE: 6071.183247909148
r2: -7336.014118836958


For Degree =  4
MSE: 919433.6248285314
r2: -1039326.1138012578


For Degree =  5
MSE: 1767180752.1918921
r2: -2460787756.5286145


For Degree =  6
MSE: 16893327698838.531
r2: -17531046343655.457


For Degree =  7
MSE: 2.2037783754269754e+17
r2: -3.142626305124253e+17


For Degree =  8
MSE: 8.290509123698993e+19
r2: -9.755071338228997e+19


For Degree =  9
MSE: 2.9015136085097243e+21
r2: -3.151478268862857e+21


