# Including Race

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler 
from sklearn.neural_network import MLPRegressor

#loading data
data = pd.read_csv('/Users/yunjuha/Desktop/ML-DXA-BIS/ML-DXA-BIS_Combined Data.csv')
df = pd.DataFrame(data)

#TRANSFORMING THE TARGETS
df['tjumppownums'] = np.log(df['jumppownums'] + 0.1)
df['tCOMB4IMaxGrip'] = np.sqrt(df['COMB4IMaxGrip'])

#dropping columns
column_to_drop = ['M2ID', 'MIDUSID', 'SAMPLMAJ', 'Height.cm.', 'Weight.kg.', 'Age.years.', 'TBW.litres.', 'ECF.litres.', 'ICF.litres.']
df = df.drop(column_to_drop, axis=1)

In [4]:
#number of individuals before NA is removed
print(len(df))

544


In [5]:
#this is the number of people we have when race IS INCLUDED
df = df.dropna()
print(len(df))

422


In [6]:
#counting the total number of individuals who are white or black (ignore the other races for now...)
race = (df['COMB1PF7A'] == 1) | (df['COMB1PF7A'] == 2)
df = df[race]

print(len(df))

413


## Multi-Layer Perceptron

### QUICK NOTE: fitting the mlp with scaling vs no scaling really does not matter because it isn't a good model either way...

### we fit our model without scaling (just because with scaling R-Squared values were negative)

In [7]:
#MLP Function
def train_test_mlp(df, feature_columns, target_column, test_size=0.2, random_state=42):
    columns = feature_columns + [target_column]
    X = df[columns]
    y = X.pop(target_column)

    #splitting our data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    #scaling features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    param_grid = {
        'hidden_layer_sizes': [(100, 100), (50, 50, 50), (200,)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01],
        'max_iter': [1000, 2000, 3000]
    }

    mlp_model = MLPRegressor(random_state = 42)

    #doing grid search with cross-validation to find the best hyperparameters
    grid_search = GridSearchCV(estimator=mlp_model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)

    best_param = grid_search.best_params_
    
    print("Best Hyperparameters:", best_param)

    #fitting model with best parameters
    best_mlp = grid_search.best_estimator_
    mlp = MLPRegressor(hidden_layer_sizes = best_param['hidden_layer_sizes'], activation = best_param['activation'], alpha = best_param['alpha'], max_iter = best_param['max_iter'], random_state = random_state)
    
    #mlp.fit(X_train_scaled, y_train) #you can do this instead if you want to try with scaled data...
    mlp.fit(X_train, y_train) #FITTING WITHOUT SCALING

    #evaluation of the model on the TRAINING set
    y_train_pred = mlp.predict(X_train)
    mse_train = metrics.mean_squared_error(y_train, y_train_pred)
    r2_train_11 = metrics.r2_score(y_train, y_train_pred)
    print("\nTrain set metrics:")
    print("MSE:", mse_train)
    print("R-squared:", r2_train_11)

    #evaluation of the model on TESTING set
    y_test_pred = mlp.predict(X_test)
    mse_test = metrics.mean_squared_error(y_test, y_test_pred)
    r2_test_11 = metrics.r2_score(y_test, y_test_pred)
    print("\nTest set metrics:")
    print("MSE:", mse_test)
    print("R-squared:", r2_test_11, "\n")
    
    #frequency count for race
    if 'COMB1PF7A' in df.columns:
        white_train_counts = X_train[X_train['COMB1PF7A'] == 1]['COMB1PF7A'].value_counts()
        white_test_counts = X_test[X_test['COMB1PF7A'] == 1]['COMB1PF7A'].value_counts()

        black_train_counts = X_train[X_train['COMB1PF7A'] == 2]['COMB1PF7A'].value_counts()
        black_test_counts = X_test[X_test['COMB1PF7A'] == 2]['COMB1PF7A'].value_counts()
    
        print("White train frequency: ", white_train_counts.values[0])
        print("White test frequency: ", white_test_counts.values[0], "\n")
    
        print("Black train frequency: ", black_train_counts.values[0])
        print("Black test frequency: ", black_test_counts.values[0])


In [8]:
#DXA: Handgrip Strength (ARMS)

print("Multi-Layer Perceptron: DXA Model, Handgrip Strength (Arms)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4DALM', 'COMB4DABM', 'COMB4DAFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD'], 'tCOMB4IMaxGrip')

Multi-Layer Perceptron: DXA Model, Handgrip Strength (Arms)
Best Hyperparameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (100, 100), 'max_iter': 1000}

Train set metrics:
MSE: 0.6282496695178236
R-squared: 0.3926718040713225

Test set metrics:
MSE: 0.631033221890879
R-squared: 0.3393915596925654 

White train frequency:  251
White test frequency:  62 

Black train frequency:  79
Black test frequency:  21


In [9]:
#DXA: Handgrip Strength (TOTAL BODY)

print("Multi-Layer Perceptron: DXA Model, Handgrip Strength (TB)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD'], 'tCOMB4IMaxGrip')

Multi-Layer Perceptron: DXA Model, Handgrip Strength (TB)
Best Hyperparameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (100, 100), 'max_iter': 1000}

Train set metrics:
MSE: 0.9124487089012322
R-squared: 0.11793693631427393

Test set metrics:
MSE: 0.9104033314310965
R-squared: 0.04692795250106596 

White train frequency:  251
White test frequency:  62 

Black train frequency:  79
Black test frequency:  21


In [10]:
#DXA: Jump Power (LEGS)

print("Multi-Layer Perceptron: DXA Model, Jump Power (Legs)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4ILLM', 'COMB4DLBM', 'COMB4DLFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLFNMD'], 'tjumppownums')

Multi-Layer Perceptron: DXA Model, Jump Power (Legs)
Best Hyperparameters: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'max_iter': 1000}

Train set metrics:
MSE: 0.10929031574530712
R-squared: 0.15099035692536233

Test set metrics:
MSE: 0.11094310976692397
R-squared: 0.18633991667856098 

White train frequency:  251
White test frequency:  62 

Black train frequency:  79
Black test frequency:  21


In [11]:
#DXA: Jump Power (TOTAL BODY)

print("Multi-Layer Perceptron: DXA Model, Jump Power (TB)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD'], 'tjumppownums')

Multi-Layer Perceptron: DXA Model, Jump Power (TB)
Best Hyperparameters: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (100, 100), 'max_iter': 1000}

Train set metrics:
MSE: 0.12177678291192606
R-squared: 0.053990627716971495

Test set metrics:
MSE: 0.13088893312112765
R-squared: 0.04005665198198638 

White train frequency:  251
White test frequency:  62 

Black train frequency:  79
Black test frequency:  21


In [12]:
#BIS: Handgrip Strength

print("Multi-Layer Perceptron: BIS Model, Handgrip Strength")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A'], 'tCOMB4IMaxGrip')

Multi-Layer Perceptron: BIS Model, Handgrip Strength


In [None]:
#BIS: Jump Power

print("Multi-Layer Perceptron: BIS Model, Jump Power")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A'], 'tjumppownums')

In [None]:
#COMBO: Handgrip Strength (ARMS)

print("Multi-Layer Perceptron: Combo Models, Handgrip Strength (Arms)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4DALM', 'COMB4DABM', 'COMB4DAFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tCOMB4IMaxGrip')

In [None]:
#COMBO: Handgrip Strength (TOTAL BODY)

print("Multi-Layer Perceptron: Combo Models, Handgrip Strength (TB)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD', 'COMB4IMECF', 'COMB4IMICF','COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tCOMB4IMaxGrip')

In [None]:
#COMBO: Jump Power (LEGS)

print("Multi-Layer Perceptron: Combo Models, Jump Power (Legs)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4ILLM', 'COMB4DLBM', 'COMB4DLFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLFNMD', 'COMB4IMECF', 'COMB4IMICF', 'COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tjumppownums')

In [None]:
#COMBO: Jump Power (TOTAL BODY)

print("Multi-Layer Perceptron: Combo Models, Jump Power (TB)")
train_test_mlp(df, ['COMB1PF7A', 'COMB4IALM', 'COMB4DTBBM', 'COMB4DTBFM', 'COMB1PRSEX', 'COMB1PRAGE', 'COMB4P1A', 'COMB4DLR3MD', 'COMB4DLFNMD', 'COMB4DLSL14MD', 'COMB4IMECF', 'COMB4IMICF','COMB4IMFFM', 'COMB4DTBFM', 'COMB4IRES0', 'COMB4IRESINF', 'COMB4IRESEXC', 'COMB4IRESINC', 'COMB4IFCHAR', 'COMB4IMCAP'], 'tjumppownums')