# ML Work

## Predicting total vaccinations administered per hundred and total people vaccinated per hundred based on happiness factors 

### Linear Regression

In [76]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import and prepare data
happiness = pd.read_csv('happiness.csv')
vaccinations = pd.read_csv('country_vaccinations.csv')
vaccinations.head()
total_vaccinations = vaccinations.groupby('country').max()[['total_vaccinations_per_hundred', 'people_vaccinated_per_hundred']]
merged = pd.merge(happiness, total_vaccinations, left_on='location', right_on='country').dropna()
data = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia']].values
data = StandardScaler().fit_transform(data)
target = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.5, random_state=216)
print(merged.columns)

Index(['location', 'Regional indicator', 'Ladder score',
       'Standard error of ladder score', 'upperwhisker', 'lowerwhisker',
       'Logged GDP per capita', 'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'Ladder score in Dystopia',
       'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption',
       'Dystopia + residual', 'total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred'],
      dtype='object')


In [77]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 245.87088875973546
r2: 0.12148253000903869


### Using K Nearest Neighbors

In [90]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,15)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

MSE: 282.95733852040814
r2: -0.009867530691755277
