# Libraries

In [19]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Loading data

In [20]:
data = pd.read_csv('bodyfat.csv')

# printing the first 5 rows of the DataFrame to check if the data is loaded correctly
print(data.head())

   Density  BodyFat  Age  Weight  Height  Neck  Chest  Abdomen    Hip  Thigh  \
0   1.0708     12.3   23  154.25   67.75  36.2   93.1     85.2   94.5   59.0   
1   1.0853      6.1   22  173.25   72.25  38.5   93.6     83.0   98.7   58.7   
2   1.0414     25.3   22  154.00   66.25  34.0   95.8     87.9   99.2   59.6   
3   1.0751     10.4   26  184.75   72.25  37.4  101.8     86.4  101.2   60.1   
4   1.0340     28.7   24  184.25   71.25  34.4   97.3    100.0  101.9   63.2   

   Knee  Ankle  Biceps  Forearm  Wrist  
0  37.3   21.9    32.0     27.4   17.1  
1  37.3   23.4    30.5     28.9   18.2  
2  38.9   24.0    28.8     25.2   16.6  
3  37.3   22.8    32.4     29.4   18.2  
4  42.2   24.0    32.2     27.7   17.7  


#  Lasso vs Ridge

In [21]:
# Choosing an arbitrary column as the target
target_column = 'Hip'
target = data[target_column]
features = data.drop(target_column, axis=1)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

## Lasso

In [23]:
# Creating the Lasso model
lasso = Lasso()

In [24]:
lasso_params = {'alpha': [0.1, 1.0, 10.0]}

In [25]:
lasso_cv = GridSearchCV(lasso, lasso_params, cv=5)
lasso_cv.fit(X_train, y_train)

In [26]:
best_alpha_lasso = lasso_cv.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_lasso))

Best alpha: 0.10


In [27]:
lasso_best = Lasso(alpha = best_alpha_lasso)
lasso_best.fit(X_train, y_train)

In [28]:
y_train_pred_lasso = lasso_best.predict(X_train)
y_test_pred_lasso = lasso_best.predict(X_test)

train_mse_lasso = np.round(mean_squared_error(y_train, y_train_pred_lasso), decimals = 4)
test_mse_lasso = np.round(mean_squared_error(y_test, y_test_pred_lasso), decimals = 4)

print('Train set MSE of Lasso:', train_mse_lasso)
print('Test set MSE of Lasso:', test_mse_lasso)

Train set MSE of Lasso: 3.6032
Test set MSE of Lasso: 3.1011


In [29]:
lasso_r2 = r2_score(y_test, y_test_pred_lasso)
print('R2 score for Lasso: {0:.4f}'.format(lasso_r2))

R2 score for Lasso: 0.9548


## Ridge

In [30]:
# Creating the Ridge model
ridge = Ridge()

In [31]:
ridge_params = {'alpha': [0.1, 1.0, 10.0]}

In [32]:
ridge_cv = GridSearchCV(ridge, ridge_params, cv=5)
ridge_cv.fit(X_train, y_train)

In [33]:
best_alpha_ridge = ridge_cv.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_ridge))

Best alpha: 10.00


In [34]:
ridge_best = Ridge(alpha = best_alpha_ridge)
ridge_best.fit(X_train, y_train)

In [35]:
y_train_pred_ridge = ridge_best.predict(X_train)
y_test_pred_ridge = ridge_best.predict(X_test)

train_mse_ridge = np.round(mean_squared_error(y_train, y_train_pred_ridge), decimals = 4)
test_mse_ridge = np.round(mean_squared_error(y_test, y_test_pred_ridge), decimals = 4)

print('Train set MSE of Ridge:', train_mse_ridge)
print('Test set MSE of Ridge:', test_mse_ridge)

Train set MSE of Ridge: 3.557
Test set MSE of Ridge: 3.1477


In [36]:
ridge_r2 = r2_score(y_test, y_test_pred_ridge)
print('R2 score for Ridge: {0:.4f}'.format(ridge_r2))

R2 score for Ridge: 0.9541
