In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler 
from sklearn.svm import SVR

#loading data
data = pd.read_csv('/Users/yunjuha/Desktop/ML-DXA-BIS/ML-DXA-BIS_Combined Data.csv')
df = pd.DataFrame(data)

#TRANSFORMING THE TARGETS
df['tjumppownums'] = np.log(df['jumppownums'] + 0.1)
df['tCOMB4IMaxGrip'] = np.sqrt(df['COMB4IMaxGrip'])

#dropping columns
column_to_drop = ['M2ID', 'MIDUSID', 'SAMPLMAJ', 'Height.cm.', 'Weight.kg.', 'Age.years.', 'TBW.litres.', 'ECF.litres.', 'ICF.litres.']
df = df.drop(column_to_drop, axis=1)

df = df.drop('COMB1PF7A', axis=1)

In [2]:
#number of individuals before NA is removed
print(len(df))

544


In [3]:
#this is the total number of people when race is removed
df = df.dropna()
print(len(df))

490


# Support Vector Regression

### QUICK NOTE: fitting svr with scaling vs no scaling depends on how we want to handle interpretation (scaled features vs not scaled feature interpretation)...we fit our model without scaling

In [4]:
#SVR function
def train_test_svr(df, feature_columns, target_column, test_size=0.2, random_state=42):
    columns = feature_columns + [target_column]
    X = df[columns]
    y = X.pop(target_column)

    #splitting our data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    #scaling features (did not used the scaled data)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    #ourparameter grid for hyperparameter tuning
    param_grid = {
        'kernel': ['linear', 'rbf', 'poly'],
        'C': [0.1, 1, 10],
        'epsilon': [0.01, 0.1, 1]
    }
    
    svr = SVR()

    #doing grid search with cross-validation to find the best hyperparameters
    grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)

    #our best hyperparameters from the grid search results
    best_params = grid_search.best_params_

    print("Best Hyperparameters:", best_params)
    
    best_svr = grid_search.best_estimator_    
    svr = SVR(kernel = best_params['kernel'], C = best_params['C'], epsilon = best_params['epsilon'])
    svr.fit(X_train, y_train) #X_train vs X_train_scaled...

    #evaluation of the model on the TRAINING set
    y_train_pred = svr.predict(X_train)
    mse_train = metrics.mean_squared_error(y_train, y_train_pred)
    r2_train_11 = metrics.r2_score(y_train, y_train_pred)
    print("\nTrain set metrics:")
    print("MSE:", mse_train)
    print("R-squared:", r2_train_11)

    #evaluation of the model on TESTING set
    y_test_pred = svr.predict(X_test)
    mse_test = metrics.mean_squared_error(y_test, y_test_pred)
    r2_test_11 = metrics.r2_score(y_test, y_test_pred)
    print("\nTest set metrics:")
    print("MSE:", mse_test)
    print("R-squared:", r2_test_11, "\n")

In [5]:
#DXA: Handgrip Strength (ARMS)

print("Support Vector Regression: DXA Model, Handgrip Strength (Arms)")
train_test_svr(df, ['COMB4DALM', 'COMB4DABM', 'COMB4DAFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD'], 'tCOMB4IMaxGrip')

Support Vector Regression: DXA Model, Handgrip Strength (Arms)


Best Hyperparameters: {'C': 10, 'epsilon': 0.1, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.535108270055494
R-squared: 0.5168828070997593

Test set metrics:
MSE: 0.6283226164364648
R-squared: 0.2801383874843155 



In [6]:
#DXA: Handgrip Strength (TOTAL BODY)

print("Support Vector Regression: DXA Model, Handgrip Strength (TB)")
train_test_svr(df, ['COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD'], 'tCOMB4IMaxGrip')

Support Vector Regression: DXA Model, Handgrip Strength (TB)
Best Hyperparameters: {'C': 10, 'epsilon': 0.01, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.6990967546952112
R-squared: 0.36882743064503143

Test set metrics:
MSE: 0.7488899609353353
R-squared: 0.14200584099103286 



In [7]:
#DXA: Jump Power (LEGS)

print("Support Vector Regression: DXA Model, Jump Power (Legs)")
train_test_svr(df, ['COMB4ILLM', 'COMB4DLBM', 'COMB4DLFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLFNMD'], 'tjumppownums')

Support Vector Regression: DXA Model, Jump Power (Legs)
Best Hyperparameters: {'C': 10, 'epsilon': 0.01, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.07903805113931826
R-squared: 0.3973192777158806

Test set metrics:
MSE: 0.07102711745966904
R-squared: 0.45312473692907784 



In [8]:
#DXA: Jump Power (TOTAL BODY)

print("Support Vector Regression: DXA Model, Jump Power (TB)")
train_test_svr(df, ['COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD'], 'tjumppownums')

Support Vector Regression: DXA Model, Jump Power (TB)
Best Hyperparameters: {'C': 10, 'epsilon': 0.1, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.08206553726597993
R-squared: 0.3742340991313856

Test set metrics:
MSE: 0.07757588110607058
R-squared: 0.4027023493959119 



In [9]:
#BIS: Handgrip Strength

print("Support Vector Regression: BIS Model, Handgrip Strength")
train_test_svr(df, ['COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A'], 'tCOMB4IMaxGrip')

Support Vector Regression: BIS Model, Handgrip Strength
Best Hyperparameters: {'C': 10, 'epsilon': 0.01, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.8465839955815192
R-squared: 0.2356700384069984

Test set metrics:
MSE: 0.7939071218847212
R-squared: 0.09043022485979357 



In [10]:
#BIS: Jump Power

print("Support Vector Regression: BIS Model, Jump Power")
train_test_svr(df, ['COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A'], 'tjumppownums')

Support Vector Regression: BIS Model, Jump Power
Best Hyperparameters: {'C': 10, 'epsilon': 0.1, 'kernel': 'rbf'}

Train set metrics:
MSE: 0.08566736643296677
R-squared: 0.34676944163271695

Test set metrics:
MSE: 0.07559966861304214
R-squared: 0.41791825235892655 



In [11]:
#COMBO: Handgrip Strength (ARMS)

print("Support Vector Regression: Combo Models, Handgrip Strength (Arms)")
train_test_svr(df, ['COMB4DALM', 'COMB4DABM', 'COMB4DAFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tCOMB4IMaxGrip')

Support Vector Regression: Combo Models, Handgrip Strength (Arms)


In [None]:
#COMBO: Handgrip Strength (TOTAL BODY)

print("Support Vector Regression: Combo Models, Handgrip Strength (TB)")
train_test_svr(df, ['COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD', 'COMB4IMECF', 'COMB4IMICF','COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tCOMB4IMaxGrip')

In [None]:
#COMBO: Jump Power (LEGS)

print("Support Vector Regression: Combo Models, Jump Power (Legs)")
train_test_svr(df, ['COMB4ILLM', 'COMB4DLBM', 'COMB4DLFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLFNMD', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tjumppownums')

In [None]:
#COMBO: Jump Power (TOTAL BODY)

print("Support Vector Regression: Combo Models, Jump Power (TB)")
train_test_svr(df, ['COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD', 'COMB4IMECF', 'COMB4IMICF','COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tjumppownums')