In [1]:
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

data = pd.read_csv('/Users/yunjuha/Desktop/SROP/DXA_BIS_Project/2023-06-23_DXA_BIS_data.csv')
df = pd.DataFrame(data)
df_filtered = df.dropna() #now there's only 240 rows instead of 245
df_filtered_copy = df_filtered.copy() #to get rid of copy error
df = df_filtered_copy

#creating dummy variables for the age ranges
df['Age_40_50'] = df['RA1PRAGE'].apply(lambda x: 1 if (x >= 40 and x <= 50) else 0)
df['Age_51_61'] = df['RA1PRAGE'].apply(lambda x: 1 if (x >= 51 and x <= 61) else 0)
df['Age_61plus'] = df['RA1PRAGE'].apply(lambda x: 1 if (x >= 61) else 0)

## Handgrip Strength Models
*does not include non-sed variables

In [14]:
#Covariates Only Model

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (Covariates only) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())


The model is Predicted Handgrip Strength (Covariates only) = 8.164 + 0.064 (Age: 40-50) + -0.113 (Age: 51-61) + -0.284 (Age: 61+) + -1.459 (Sex) +  -0.172 (Race) +  -0.029 (BMI)

R^2 value is 0.461
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.461
Model:                            OLS   Adj. R-squared:                  0.447
Method:                 Least Squares   F-statistic:                     33.21
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           8.13e-29
Time:                        22:05:51   Log-Likelihood:                -283.91
No. Observations:                 240   AIC:                             581.8
Df Residuals:                     233   BIC:                             606.2
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
            

## Now adding Muscle Measure!

In [15]:
#ALM

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4IALM']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
alm_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl ALM) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", alm_coef, "(ALM)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl ALM) = 7.653 + 0.114 (Age: 40-50) + 0.02 (Age: 51-61) + -0.036 (Age: 61+) + -0.714 (Sex) +  -0.252 (Race) +  -1.448 (BMI) + 0.861 (ALM)

R^2 value is 0.513
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.513
Model:                            OLS   Adj. R-squared:                  0.498
Method:                 Least Squares   F-statistic:                     34.93
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           4.78e-33
Time:                        22:05:51   Log-Likelihood:                -271.70
No. Observations:                 240   AIC:                             559.4
Df Residuals:                     232   BIC:                             587.2
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
       

In [16]:
#ALM/ht2

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4IALMbyHt2']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
almht_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl ALM/ht^2) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", almht_coef, "(ALM/ht^2)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl ALM/ht^2) = 7.976 + 0.123 (Age: 40-50) + -0.031 (Age: 51-61) + -0.141 (Age: 61+) + -1.028 (Sex) +  -0.316 (Race) +  -1.527 (BMI) + 2.268 (ALM/ht^2)

R^2 value is 0.491
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.491
Model:                            OLS   Adj. R-squared:                  0.476
Method:                 Least Squares   F-statistic:                     32.02
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           6.89e-31
Time:                        22:05:51   Log-Likelihood:                -276.96
No. Observations:                 240   AIC:                             569.9
Df Residuals:                     232   BIC:                             597.8
Df Model:                           7                                         
Covariance Type:            nonrobust                                     

In [17]:
#ALM/BMI

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4IALMbyBMI']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
almbmi_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl ALM/BMI) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", almbmi_coef, "(ALM/BMI)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl ALM/BMI) = 5.357 + 0.129 (Age: 40-50) + 0.018 (Age: 51-61) + -0.043 (Age: 61+) + -0.724 (Sex) +  -0.255 (Race) +  0.617 (BMI) + 2.057 (ALM/BMI)

R^2 value is 0.512
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.512
Model:                            OLS   Adj. R-squared:                  0.497
Method:                 Least Squares   F-statistic:                     34.78
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           6.15e-33
Time:                        22:05:51   Log-Likelihood:                -271.97
No. Observations:                 240   AIC:                             559.9
Df Residuals:                     232   BIC:                             587.8
Df Model:                           7                                         
Covariance Type:            nonrobust                                         

In [18]:
#ALM/(E/I)w

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4IALMbyE2Icor']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
almeiw_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl ALM/(E/I)w) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", almeiw_coef, "(ALM/(E/I)w)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl ALM/(E/I)w) = 1.324 + 0.144 (Age: 40-50) + 0.056 (Age: 51-61) + 0.026 (Age: 61+) + -0.685 (Sex) +  -0.271 (Race) +  -0.847 (BMI) + 2.043 (ALM/(E/I)w)

R^2 value is 0.517
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.517
Model:                            OLS   Adj. R-squared:                  0.502
Method:                 Least Squares   F-statistic:                     35.47
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           1.96e-33
Time:                        22:05:51   Log-Likelihood:                -270.76
No. Observations:                 240   AIC:                             557.5
Df Residuals:                     232   BIC:                             585.4
Df Model:                           7                                         
Covariance Type:            nonrobust                                   

In [19]:
#LLM

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4ILLM']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
llm_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl LLM) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", llm_coef, "(LLM)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl LLM) = 6.333 + 0.13 (Age: 40-50) + 0.008 (Age: 51-61) + -0.057 (Age: 61+) + -0.878 (Sex) +  -0.223 (Race) +  -1.333 (BMI) + 1.87 (LLM)

R^2 value is 0.507
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.507
Model:                            OLS   Adj. R-squared:                  0.492
Method:                 Least Squares   F-statistic:                     34.02
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           2.20e-32
Time:                        22:05:51   Log-Likelihood:                -273.32
No. Observations:                 240   AIC:                             562.6
Df Residuals:                     232   BIC:                             590.5
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
        

In [22]:
#LLM/(E/I)L

#predictor variables
X = df[['Age_40_50', 'Age_51_61', 'Age_61plus', 'RA1PRSEX', 'RA1PF7A', 'tRA4PBMI', 'tRA4ILLMbyLE2Icor']]

#target variable y
y = df['tRA4IMaxGrip']

#creating a linear regression model and fit the data
model = LinearRegression()
results = model.fit(X, y)

#printing the coefficients and intercept
first_age_coef = round(model.coef_[0], 3)
second_age_coef = round(model.coef_[1], 3)
third_age_coef = round(model.coef_[2], 3)
sex_coef = round(model.coef_[3], 3)
race_coef = round(model.coef_[4], 3)
bmi_coef = round(model.coef_[5], 3)
llmei_coef = round(model.coef_[6], 3)
intercept = round(model.intercept_, 3)

print("The model is Predicted Handgrip Strength (incl LLM/(E/I)l) =", intercept, "+", first_age_coef, "(Age: 40-50) +", second_age_coef, "(Age: 51-61) +", third_age_coef, "(Age: 61+) +", sex_coef, "(Sex) + ", race_coef, "(Race) + ", bmi_coef, "(BMI) +", llmei_coef, "(LLM/(E/I)l)\n")

r2 = round(model.score(X, y), 3)
print("R^2 value is", r2)

##########################
#for the MLR table
#adding a constant term to the predictors (X) matrix
X = sm.add_constant(X)

#creating and fit the regression model
model = sm.OLS(y, X)
results = model.fit()

#printing the regression results summary
print(results.summary())

The model is Predicted Handgrip Strength (incl LLM/(E/I)l) = 7.993 + 0.138 (Age: 40-50) + 0.052 (Age: 51-61) + 0.038 (Age: 61+) + -1.138 (Sex) +  -0.207 (Race) +  -0.599 (BMI) + 0.815 (LLM/(E/I)l)

R^2 value is 0.49
                            OLS Regression Results                            
Dep. Variable:           tRA4IMaxGrip   R-squared:                       0.490
Model:                            OLS   Adj. R-squared:                  0.475
Method:                 Least Squares   F-statistic:                     31.83
Date:                Tue, 27 Jun 2023   Prob (F-statistic):           9.47e-31
Time:                        22:06:03   Log-Likelihood:                -277.29
No. Observations:                 240   AIC:                             570.6
Df Residuals:                     232   BIC:                             598.4
Df Model:                           7                                         
Covariance Type:            nonrobust                                    