In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
# Import the Lasso class from the scikit-learn library to train the Lasso linear regression model.
from sklearn.linear_model import Lasso
# Import the Ridge class from the scikit-learn library to train the Ridge linear regression model.
from sklearn.linear_model import Ridge
# Import the mean_squared_error function from the scikit-learn library to calculate the mean squared error (MSE).
from sklearn.metrics import mean_squared_error
# Import the r2_score function from the scikit-learn library to calculate the R2 score.
from sklearn.metrics import r2_score
# Import the GridSearchCV class from the scikit-learn library to search for the best value of hyperparameters.
from sklearn.model_selection import GridSearchCV
# Import the train_test_split function from the scikit-learn library to split the data into training set and test set.
from sklearn.model_selection import train_test_split
# Import the StandardScaler class from the scikit-learn library to normalize data.
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
filename = r'/content/drive/MyDrive/Data.csv'
import csv
open(filename)

Mounted at /content/drive


<_io.TextIOWrapper name='/content/drive/MyDrive/Data.csv' mode='r' encoding='UTF-8'>

In [3]:
df = pd.read_csv(filename)
df.head()

Unnamed: 0,Density,BodyFat,Age,Weight,Height,Neck,Chest,Abdomen,Hip,Thigh,Knee,Ankle,Biceps,Forearm,Wrist
0,1.0708,12.3,23,154.25,67.75,36.2,93.1,85.2,94.5,59.0,37.3,21.9,32.0,27.4,17.1
1,1.0853,6.1,22,173.25,72.25,38.5,93.6,83.0,98.7,58.7,37.3,23.4,30.5,28.9,18.2
2,1.0414,25.3,22,154.0,66.25,34.0,95.8,87.9,99.2,59.6,38.9,24.0,28.8,25.2,16.6
3,1.0751,10.4,26,184.75,72.25,37.4,101.8,86.4,101.2,60.1,37.3,22.8,32.4,29.4,18.2
4,1.034,28.7,24,184.25,71.25,34.4,97.3,100.0,101.9,63.2,42.2,24.0,32.2,27.7,17.7


In [4]:
target_col = 'Weight'
X = df.drop(target_col, axis=1)
y = df[target_col]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
#LASSO
lasso = Lasso()

In [7]:
# Create a list of different alpha hyperparameter values.
alphas_lasso = np.logspace(-3, 3, 100)
# Creates a lasso_params parameter with different alpha hyperparameter values
lasso_params = {'alpha': alphas_lasso}

In [8]:
# Initialize a GridSearchCV() object to search for the best alpha value with 5 fold validation.
grid_lasso = GridSearchCV(lasso, lasso_params, cv=5)
grid_lasso.fit(X_train, y_train)
# Get the best alpha value found by GridSearchCV().
best_alpha_lasso = grid_lasso.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_lasso))

Best alpha: 7.56


In [9]:
# Initialize a Lasso model with the best alpha value.
lasso_best = Lasso(alpha = best_alpha_lasso)
lasso_best.fit(X_train, y_train)

In [10]:
# Predict the target variable for the training set X_train.
y_train_pred_lasso = lasso_best.predict(X_train)
# Predict the target variable for the test set X_test.
y_test_pred_lasso = lasso_best.predict(X_test)

# Calculate the mean squared error (MSE) for the training set and test set.
train_mse_lasso = np.round(mean_squared_error(y_train, y_train_pred_lasso), decimals = 4)
test_mse_lasso = np.round(mean_squared_error(y_test, y_test_pred_lasso), decimals = 4)

print('Train set MSE of Lasso:', train_mse_lasso)
print('Test set MSE of Lasso:', test_mse_lasso)

Train set MSE of Lasso: 42.8316
Test set MSE of Lasso: 60.5623


In [11]:
# Calculate the R2 score for the test set.
lasso_r2 = r2_score(y_test, y_test_pred_lasso)
print('R2 score for Lasso: {0:.4f}'.format(lasso_r2))

R2 score for Lasso: 0.9409


In [12]:
#RIDGE
ridge = Ridge()

In [13]:
# Create a list of different alpha hyperparameter values.
alphas_ridge = np.logspace(-3, 3, 100)
# Creates a ridge_params with different alpha hyperparameter values.
ridge_params = {'alpha': alphas_ridge}

In [14]:
# Initialize a GridSearchCV() object to search for the best alpha value with 5 fold validation.
grid_ridge = GridSearchCV(ridge, ridge_params, cv=5)
grid_ridge.fit(X_train, y_train)
# Get the best alpha value found by GridSearchCV().
best_alpha_ridge = grid_ridge.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_ridge))

Best alpha: 869.75


In [15]:
# Train the Ridge model with training data X_train and y_train.
ridge_best = Ridge(alpha = best_alpha_ridge)
ridge_best.fit(X_train, y_train)

In [16]:
# Predict the target variable for the training set X_train and X_test
y_train_pred_ridge = ridge_best.predict(X_train)
y_test_pred_ridge = ridge_best.predict(X_test)

# Calculate the mean squared error (MSE) for the training set and test set
train_mse_ridge = np.round(mean_squared_error(y_train, y_train_pred_ridge), decimals = 4)
test_mse_ridge = np.round(mean_squared_error(y_test, y_test_pred_ridge), decimals = 4)

print('Train set MSE of Ridge:', train_mse_ridge)
print('Test set MSE of Ridge:', test_mse_ridge)

Train set MSE of Ridge: 27.1519
Test set MSE of Ridge: 47.2027


In [17]:
# Calculate the R2 score for the test set.
ridge_r2 = r2_score(y_test, y_test_pred_ridge)
print('R2 score for Ridge: {0:.4f}'.format(ridge_r2))

R2 score for Ridge: 0.9540
