# ML Work

## Predicting total vaccinations administered per hundred and total people vaccinated per hundred based on happiness factors 

### Splitting Data

In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import and prepare data
happiness = pd.read_csv('happiness.csv')
vaccinations = pd.read_csv('country_vaccinations.csv')
vaccinations.head()
total_vaccinations = vaccinations.groupby('country').max()[['total_vaccinations_per_hundred', 'people_vaccinated_per_hundred']]
merged = pd.merge(happiness, total_vaccinations, left_on='location', right_on='country').dropna()
data = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
data = StandardScaler().fit_transform(data)
target = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)


### Linear Regression

In [2]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
lin_model = LinearRegression().fit(X=train_data, y=train_target)
predicted = lin_model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 520.5692285323674
r2: 3.4892832205479785e-05


### K Nearest Neighbors

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 2}
MSE: 466.31633124999985
r2: 0.0853212717459052


### Polynomial Features

In [4]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,7):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 5011.949209431219
r2: -9.613458026593811


For Degree =  3
MSE: 4479.891749552626
r2: -8.455276556870581


For Degree =  4
MSE: 911.1023092229955
r2: -0.8704910390827177


For Degree =  5
MSE: 4115.653387715832
r2: -7.305441369510589


For Degree =  6
MSE: 8062.783731128486
r2: -16.009018052741585




### Single Layer Neural Network

In [5]:
#import
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np
num_loops = 2

In [6]:
#convert to tensors
X_train = torch.from_numpy(train_data.astype(np.float32))
y_train = torch.from_numpy(train_target.astype(np.float32))


In [7]:
#define network

class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear = torch.nn.Linear(14, 2)  
        
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

learning_rate = 0.0001
l = nn.MSELoss()
model = NeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr =learning_rate )


In [8]:
#training
for i in range(num_loops):
    num_epochs = 10000 
    for epoch in range(num_epochs):
        #forward feed
        y_pred = model(X_train.requires_grad_())

        #calculate the loss
        loss= l(y_pred, y_train)

        #backward propagation: calculate gradients
        loss.backward()

        #update the weights
        optimizer.step()

        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()

        #print('epoch {}, loss {}'.format(epoch, loss.item()))    

In [9]:
# Evalutation
nn_predicted = model(torch.from_numpy(test_data.astype(np.float32))).detach().numpy()
print('MSE:', mean_squared_error(test_target, nn_predicted))
print('r2:', r2_score(test_target, nn_predicted))

MSE: 485.0179357074728
r2: 0.07741192611109121


### Multilayer Network

In [10]:
# define network
class MultiLayerNeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(MultiLayerNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(14, 100),
            nn.ReLU(),
            nn.Linear(100, 25),
            nn.ReLU(),
            nn.Linear(25, 2),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits     
    
learning_rate = 0.0001
l = nn.MSELoss()
model = MultiLayerNeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr =learning_rate )


In [11]:
#training
for i in range(num_loops):
    num_epochs = 10000
    for epoch in range(num_epochs):
        #forward feed
        y_pred = model(X_train.requires_grad_())

        #calculate the loss
        loss= l(y_pred, y_train)

        #backward propagation: calculate gradients
        loss.backward()

        #update the weights
        optimizer.step()

        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()

        #print('epoch {}, loss {}'.format(epoch, loss.item()))

In [12]:
nn_predicted = model(torch.from_numpy(test_data.astype(np.float32))).detach().numpy()
print('MSE:', mean_squared_error(test_target, nn_predicted))
print('r2:', r2_score(test_target, nn_predicted))

MSE: 481.504151051936
r2: 0.04213037788914076


## Predicting happiness factors based on total vaccinations administered per hundred and total people vaccinated per hundred

### Splitting Data

In [13]:
target = merged[['Ladder score','Logged GDP per capita',  'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption', 'Ladder score in Dystopia', 'Explained by: Log GDP per capita', 'Explained by: Social support',
       'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']].values
target = StandardScaler().fit_transform(data)
data = merged[['total_vaccinations_per_hundred',
       'people_vaccinated_per_hundred']].values
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=.2, random_state=216)

### Linear Regression

In [14]:
# Training and Predicting model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression().fit(X=train_data, y=train_target)
predicted = model.predict(test_data)
print('MSE:', mean_squared_error(test_target, predicted))
print('r2:', r2_score(test_target, predicted))

MSE: 1.4764039062843939
r2: -0.526276114382343


### K Nearest Neighbors

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
knn = GridSearchCV(estimator=KNeighborsRegressor(), param_grid={'n_neighbors': range(1,30)})
knn.fit(X=train_data, y=train_target)
knn_predicted = knn.predict(test_data)
print(knn.best_params_)
print('MSE:', mean_squared_error(test_target, knn_predicted))
print('r2:', r2_score(test_target, knn_predicted))

{'n_neighbors': 18}
MSE: 0.5960565996609892
r2: 0.3699128041077059


### Polynomial Features

In [16]:
from sklearn.preprocessing import PolynomialFeatures 
for i in range(2,10):
    deg = i
    poly_train_data = PolynomialFeatures(deg, include_bias=False).fit_transform(train_data)
    poly_model = LinearRegression()
    poly_model.fit(X=poly_train_data, y=train_target)
    poly_test_data=PolynomialFeatures(deg, include_bias=False).fit_transform(test_data)
    poly_predicted = poly_model.predict(poly_test_data)
    print('For Degree = ', deg)
    print('MSE:', mean_squared_error(test_target, poly_predicted))
    print('r2:', r2_score(test_target, poly_predicted))
    print('\n')

For Degree =  2
MSE: 81.9400242945651
r2: -82.99236974709423


For Degree =  3
MSE: 6071.183247913994
r2: -7336.014118845635


For Degree =  4
MSE: 919433.6248495538
r2: -1039326.1138365762


For Degree =  5
MSE: 1767180753.19922
r2: -2460787757.7068987


For Degree =  6
MSE: 16893327929231.135
r2: -17531045611489.676


For Degree =  7
MSE: 2.2033770883780054e+17
r2: -3.1420755999491776e+17


For Degree =  8
MSE: 8.290509126079196e+19
r2: -9.755071340627401e+19


For Degree =  9
MSE: 2.9015136068934026e+21
r2: -3.1514782677558116e+21




### Single Layer Neural Network

In [17]:
#import
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [18]:
#convert to tensors
X_train = torch.from_numpy(train_data.astype(np.float32))
y_train = torch.from_numpy(train_target.astype(np.float32))

In [19]:
#define network
class ReverseNeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(ReverseNeuralNetwork, self).__init__()
        self.linear = torch.nn.Linear(2, 14)  
        
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

learning_rate = 0.0001
l = nn.MSELoss()
model = ReverseNeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr =learning_rate )


In [20]:
#training
for i in range(num_loops):
    num_epochs = 10000
    for epoch in range(num_epochs):
        #forward feed
        y_pred = model(X_train.requires_grad_())

        #calculate the loss
        loss= l(y_pred, y_train)

        #backward propagation: calculate gradients
        loss.backward()

        #update the weights
        optimizer.step()

        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()

        #print('epoch {}, loss {}'.format(epoch, loss.item()))

In [21]:
# Evalutation
nn_predicted = model(torch.from_numpy(test_data.astype(np.float32))).detach().numpy()
print('MSE:', mean_squared_error(test_target, nn_predicted))
print('r2:', r2_score(test_target, nn_predicted))

MSE: 5.3594893123016805
r2: -3.9794481294065664


### Multilayer Neural Network

In [22]:
# define network
class ReverseMultiLayerNeuralNetwork(torch.nn.Module):
    def __init__(self):
        super(ReverseMultiLayerNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(2, 100),
            nn.ReLU(),
            nn.Linear(100, 25),
            nn.ReLU(),
            nn.Linear(25, 14),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits     
    
learning_rate = 0.0001
l = nn.MSELoss()
model = ReverseMultiLayerNeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr =learning_rate )


In [23]:
#training
for i in range(num_loops):
    num_epochs = 10000
    for epoch in range(num_epochs):
        #forward feed
        y_pred = model(X_train.requires_grad_())

        #calculate the loss
        loss= l(y_pred, y_train)

        #backward propagation: calculate gradients
        loss.backward()

        #update the weights
        optimizer.step()

        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()

        #print('epoch {}, loss {}'.format(epoch, loss.item()))

In [24]:
# Evalutation
nn_predicted = model(torch.from_numpy(test_data.astype(np.float32))).detach().numpy()
print('MSE:', mean_squared_error(test_target, nn_predicted))
print('r2:', r2_score(test_target, nn_predicted))

MSE: 0.9013502976730045
r2: 0.027806417552400136
