In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn import metrics

from sklearn.model_selection import GridSearchCV

data = pd.read_csv('/Users/yunjuha/Desktop/SROP/DXA_BIS_Project/narrowed down list/narrowed_transformed_data.csv')
df = pd.DataFrame(data)
df = df.dropna()

# Lasso Models

In [3]:
#DXA model, handgrip strength (TB)

print("Lasso: DXA Model, Handgrip Strength (TB)")

print("UNTRANSFORMED")
columns = ['RA4IALM', 'RA4DTBBM', 'RA4DTBFM', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A', 'RA4DLR3MD', 'RA4DLFNMD', 'RA4DLSMD']
X = df[columns]
y = df['RA4IMaxGrip']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [10000, 20000, 30000, 50000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: DXA Model, Handgrip Strength (TB)
UNTRANSFORMED
Best Alpha: 0.2718588242732943
Best Max Iterations: 10000

TEST Set Metrics:
Mean Squared Error: 89.45720683190115
R-squared: 0.38822530646620035

TRAINING Set Metrics:
Mean Squared Error: 64.56560817992101
R-squared: 0.5343858122950351
       Feature  Importance
3     RA1PRSEX   -5.002418
0      RA4IALM    1.936033
8       RA4P1A    1.904675
2     RA4DTBFM   -1.186081
9    RA4DLR3MD    0.636407
5    Age_40_50    0.253389
4      RA1PF7A   -0.202050
1     RA4DTBBM    0.000000
6    Age_51_61   -0.000000
7   Age_61plus   -0.000000
10   RA4DLFNMD   -0.000000
11    RA4DLSMD   -0.000000


In [4]:
#DXA model, jump power (TB)

print("Lasso: DXA Model, Jump Power (TB)")

print("UNTRANSFORMED")
columns = ['RA4IALM', 'RA4DTBBM', 'RA4DTBFM', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A', 'RA4DLR3MD', 'RA4DLFNMD', 'RA4DLSMD']
X = df[columns]
y = df['jumppownums']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [10000, 20000, 30000, 50000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: DXA Model, Jump Power (TB)
UNTRANSFORMED
Best Alpha: 0.015199110829529346
Best Max Iterations: 10000

TEST Set Metrics:
Mean Squared Error: 0.2185138644836685
R-squared: 0.7916696252816687

TRAINING Set Metrics:
Mean Squared Error: 0.20670451726520583
R-squared: 0.7663702180845554
       Feature  Importance
0      RA4IALM    0.794747
2     RA4DTBFM   -0.329328
7   Age_61plus   -0.219441
10   RA4DLFNMD    0.112295
11    RA4DLSMD   -0.101377
6    Age_51_61   -0.051046
4      RA1PF7A   -0.038660
9    RA4DLR3MD    0.032047
1     RA4DTBBM    0.022989
3     RA1PRSEX   -0.000000
5    Age_40_50   -0.000000
8       RA4P1A   -0.000000


In [5]:
#BIS Model, handgrip strength

print("Lasso: BIS Model, Handgrip Strength")

print("UNTRANSFORMED")
columns = ['RA4IMECF', 'RA4IMICF', 'RA4IMFFM', 'RA4DTBFM', 'RA4IRES0', 'RA4IRESINF', 'RA4IRESEXC', 'RA4IRESINC', 'RA4IFCHAR', 'RA4IMCAP', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A']
X = df[columns]
y = df['RA4IMaxGrip']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [10000, 20000, 30000, 50000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: BIS Model, Handgrip Strength
UNTRANSFORMED


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Best Alpha: 0.8302175681319752
Best Max Iterations: 10000

TEST Set Metrics:
Mean Squared Error: 88.25571047563173
R-squared: 0.3964420292006796

TRAINING Set Metrics:
Mean Squared Error: 65.66310107081388
R-squared: 0.5264712541376788
       Feature  Importance
10    RA1PRSEX   -5.102797
15      RA4P1A    2.425354
7   RA4IRESINC   -1.278556
0     RA4IMECF    0.000000
1     RA4IMICF    0.000000
2     RA4IMFFM    0.000000
3     RA4DTBFM   -0.000000
4     RA4IRES0   -0.000000
5   RA4IRESINF   -0.000000
6   RA4IRESEXC   -0.000000
8    RA4IFCHAR   -0.000000
9     RA4IMCAP    0.000000
11     RA1PF7A   -0.000000
12   Age_40_50    0.000000
13   Age_51_61   -0.000000
14  Age_61plus   -0.000000


In [6]:
#BIS Model, jumppower

print("Lasso: BIS Model, Jump Power")

print("UNTRANSFORMED")
columns = ['RA4IMECF', 'RA4IMICF', 'RA4IMFFM', 'RA4DTBFM', 'RA4IRES0', 'RA4IRESINF', 'RA4IRESEXC', 'RA4IRESINC', 'RA4IFCHAR', 'RA4IMCAP', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A']
X = df[columns]
y = df['jumppownums']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [10000, 20000, 30000, 50000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: BIS Model, Jump Power
UNTRANSFORMED


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Best Alpha: 0.0006428073117284319
Best Max Iterations: 10000

TEST Set Metrics:
Mean Squared Error: 0.2052315106095807
R-squared: 0.8043329762606497

TRAINING Set Metrics:
Mean Squared Error: 0.215609234790088
R-squared: 0.7563055748881629
       Feature    Importance
1     RA4IMICF  2.307181e+00
0     RA4IMECF -1.280115e+00
10    RA1PRSEX -6.258711e-01
9     RA4IMCAP -6.033410e-01
3     RA4DTBFM -3.255004e-01
7   RA4IRESINC  3.054453e-01
8    RA4IFCHAR -2.198793e-01
14  Age_61plus -2.196246e-01
4     RA4IRES0 -1.447473e-01
15      RA4P1A -9.160178e-02
13   Age_51_61 -9.017248e-02
12   Age_40_50 -7.126959e-02
11     RA1PF7A  2.295090e-02
6   RA4IRESEXC -1.249371e-14
2     RA4IMFFM  0.000000e+00
5   RA4IRESINF  0.000000e+00


In [7]:
#Combo Models, handgrip strength (TB)

print("Lasso: Combo Models, Handgrip Strength (TB)")

print("UNTRANSFORMED")
columns = ['RA4IALM', 'RA4DTBBM', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A', 'RA4DLR3MD', 'RA4DLFNMD', 'RA4DLSMD', 'RA4IMECF', 'RA4IMICF','RA4IMFFM', 'RA4DTBFM', 'RA4IRES0', 'RA4IRESINF', 'RA4IRESEXC', 'RA4IRESINC', 'RA4IFCHAR', 'RA4IMCAP'] 
X = df[columns]
y = df['RA4IMaxGrip']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [10000, 20000, 30000, 50000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: Combo Models, Handgrip Strength (TB)
UNTRANSFORMED


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Best Alpha: 0.8302175681319752
Best Max Iterations: 10000

TEST Set Metrics:
Mean Squared Error: 88.60391111189746
R-squared: 0.39406077513425175

TRAINING Set Metrics:
Mean Squared Error: 65.59536117693423
R-squared: 0.526959759652505
       Feature  Importance
2     RA1PRSEX   -5.093846
7       RA4P1A    2.382436
18  RA4IRESINC   -1.242113
8    RA4DLR3MD    0.123095
0      RA4IALM    0.000000
12    RA4IMICF    0.000000
19   RA4IFCHAR   -0.000000
17  RA4IRESEXC   -0.000000
16  RA4IRESINF   -0.000000
15    RA4IRES0   -0.000000
14    RA4DTBFM   -0.000000
13    RA4IMFFM    0.000000
10    RA4DLSMD    0.000000
11    RA4IMECF    0.000000
1     RA4DTBBM    0.000000
9    RA4DLFNMD    0.000000
6   Age_61plus   -0.000000
5    Age_51_61   -0.000000
4    Age_40_50    0.000000
3      RA1PF7A   -0.000000
20    RA4IMCAP    0.000000


In [8]:
#Combo Models, jumppower (TB)

print("Lasso: Combo Models, Jump Power (TB)")

print("UNTRANSFORMED")
columns = ['RA4IALM', 'RA4DTBBM', 'RA1PRSEX', 'RA1PF7A', 'Age_40_50', 'Age_51_61', 'Age_61plus', 'RA4P1A', 'RA4DLR3MD', 'RA4DLFNMD', 'RA4DLSMD', 'RA4IMECF', 'RA4IMICF','RA4IMFFM', 'RA4DTBFM', 'RA4IRES0', 'RA4IRESINF', 'RA4IRESEXC', 'RA4IRESINC', 'RA4IFCHAR', 'RA4IMCAP'] 
X = df[columns]
y = df['jumppownums']

#dividing into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the Lasso regression model
lasso_model = Lasso()

# Define a parameter grid for hyperparameter tuning
alphas = np.logspace(-4, 0, 100)
max_iters = [100000, 200000, 300000, 500000]
param_grid = {'alpha': alphas, 'max_iter': max_iters}

# Perform grid search with cross-validation to find the best hyperparameters
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters from the grid search results
best_alpha = grid_search.best_params_['alpha']
best_max_iter = grid_search.best_params_['max_iter']

print("Best Alpha:", best_alpha)
print("Best Max Iterations:", best_max_iter)

# Retrain the model with the entire training set using the best hyperparameters
best_lasso_model = Lasso(alpha=best_alpha, max_iter=best_max_iter)
best_lasso_model.fit(X_train_scaled, y_train)

# Evaluate the model on the test set
y_pred = best_lasso_model.predict(X_test_scaled)
mse = metrics.mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)

print("\nTEST Set Metrics:")
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Evaluate the model on the train set
y_pred_train = best_lasso_model.predict(X_train_scaled)
mse_train = metrics.mean_squared_error(y_train, y_pred_train)
r2_train = metrics.r2_score(y_train, y_pred_train)

print("\nTRAINING Set Metrics:")
print("Mean Squared Error:", mse_train)
print("R-squared:", r2_train)

# Get the feature importance (coefficients) from the Lasso model
feature_importance = best_lasso_model.coef_

# Create a DataFrame to associate each feature with its importance value
importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# Sort the features based on their importance (absolute value of coefficients) in descending order
importance_df = importance_df.reindex(importance_df['Importance'].abs().sort_values(ascending=False).index)

print(importance_df)

Lasso: Combo Models, Jump Power (TB)
UNTRANSFORMED
Best Alpha: 0.00021049041445120198
Best Max Iterations: 100000

TEST Set Metrics:
Mean Squared Error: 0.18762164844574564
R-squared: 0.8211221589150259

TRAINING Set Metrics:
Mean Squared Error: 0.16737542325113347
R-squared: 0.8108223073712679
       Feature  Importance
12    RA4IMICF    1.211929
11    RA4IMECF   -1.188510
0      RA4IALM    0.979184
2     RA1PRSEX   -0.348518
16  RA4IRESINF    0.338631
14    RA4DTBFM   -0.336645
20    RA4IMCAP   -0.312962
17  RA4IRESEXC   -0.247626
6   Age_61plus   -0.139664
3      RA1PF7A   -0.122358
9    RA4DLFNMD    0.115281
7       RA4P1A   -0.091747
10    RA4DLSMD   -0.080067
19   RA4IFCHAR   -0.062880
18  RA4IRESINC    0.049353
5    Age_51_61   -0.048008
8    RA4DLR3MD    0.039335
1     RA4DTBBM   -0.006909
15    RA4IRES0   -0.004628
4    Age_40_50   -0.001649
13    RA4IMFFM    0.000000
