# Linear regression showing effects of sex, age, and sex*age on motion
- regression example I followed for this: https://www.youtube.com/watch?v=z_BXANUOjJY

## A. Import Statements

In [28]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import statsmodels.api as sm

import warnings
warnings.filterwarnings('ignore')

## B. Load in demographic and frame-to-frame displacement (FFD) info of each subject

In [82]:
# read in demographic info
data = pd.read_csv('data/ffd_sex_age_n595.txt',sep = '\t',skiprows = 1,names = ['subj_ID','FFD','Sex','Age'])
data.set_index('subj_ID',inplace=True)
data = pd.get_dummies(data, columns = ['Sex'])
data = data.drop('Sex_M', axis = 1)
# display first 10 data values to check that read-in was successful
data.head(10)

Unnamed: 0_level_0,FFD,Age,Sex_F
subj_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
HCA6030645,0.110073,45.333333,1
HCA6031344,0.120839,80.583333,1
HCA6037457,0.091109,73.333333,1
HCA6051047,0.118268,60.416667,1
HCA6053758,0.084815,36.416667,1
HCA6054457,0.101823,62.5,0
HCA6075263,0.147459,63.916667,0
HCA6086369,0.19958,75.833333,0
HCA6086470,0.153076,40.416667,1
HCA6091766,0.200693,72.333333,0


## C. Set independent and dependent variables

In [83]:
x = data.drop('FFD', axis = 1)
y = data[['FFD']]

## D. Linear regression with sex and age

In [84]:
lm_1 = sm.OLS(y,x).fit()
print(lm_1.summary())

                                 OLS Regression Results                                
Dep. Variable:                    FFD   R-squared (uncentered):                   0.875
Model:                            OLS   Adj. R-squared (uncentered):              0.874
Method:                 Least Squares   F-statistic:                              2071.
Date:                Mon, 19 Sep 2022   Prob (F-statistic):                   3.01e-268
Time:                        19:14:08   Log-Likelihood:                          1065.5
No. Observations:                 595   AIC:                                     -2127.
Df Residuals:                     593   BIC:                                     -2118.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

## E. Linear regression with sex, age, and a constant

In [87]:
# added constant (1.0)
x_2 = sm.add_constant(x)

lm_2 = sm.OLS(y,x_2).fit()
print(lm_2.summary())

                            OLS Regression Results                            
Dep. Variable:                    FFD   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.029
Method:                 Least Squares   F-statistic:                     9.925
Date:                Mon, 19 Sep 2022   Prob (F-statistic):           5.76e-05
Time:                        19:15:45   Log-Likelihood:                 1143.4
No. Observations:                 595   AIC:                            -2281.
Df Residuals:                     592   BIC:                            -2268.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0833      0.006     13.313      0.0

## F. Linear regression with sex, age, and sex*age interaction

In [93]:
# added sex*age interaction, w/o constant
x['Sex*Age'] = x['Sex_F']*x['Age']
x.head(10)

lm_3 = sm.OLS(y,x).fit()
print(lm_3.summary())

                                 OLS Regression Results                                
Dep. Variable:                    FFD   R-squared (uncentered):                   0.890
Model:                            OLS   Adj. R-squared (uncentered):              0.890
Method:                 Least Squares   F-statistic:                              1602.
Date:                Mon, 19 Sep 2022   Prob (F-statistic):                   1.27e-283
Time:                        19:21:51   Log-Likelihood:                          1105.1
No. Observations:                 595   AIC:                                     -2204.
Df Residuals:                     592   BIC:                                     -2191.
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

## G. Linear regression with sex, age, sex*age, and a constant

In [96]:
# added sex*age interaction, w/ constant
x_2['Sex*Age'] = x_2['Sex_F']*x_2['Age']
x_2.head(10)

lm_4 = sm.OLS(y,x_2).fit()
print(lm_4.summary())

                            OLS Regression Results                            
Dep. Variable:                    FFD   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.028
Method:                 Least Squares   F-statistic:                     6.674
Date:                Mon, 19 Sep 2022   Prob (F-statistic):           0.000195
Time:                        19:22:33   Log-Likelihood:                 1143.5
No. Observations:                 595   AIC:                            -2279.
Df Residuals:                     591   BIC:                            -2262.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0866      0.010      9.032      0.0

## H. Linear regression with sex, sex*age, and a constant

In [101]:
# removed 'Age' column, w/ constant
cols = ['Sex_F', 'Sex*Age','const']
x_3 = x_2[cols]
x_3.head(10)

lm_5 = sm.OLS(y,x_3).fit()
print(lm_5.summary())

                            OLS Regression Results                            
Dep. Variable:                    FFD   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     7.015
Date:                Mon, 19 Sep 2022   Prob (F-statistic):           0.000975
Time:                        19:29:41   Log-Likelihood:                 1140.6
No. Observations:                 595   AIC:                            -2275.
Df Residuals:                     592   BIC:                            -2262.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Sex_F         -0.0293      0.008     -3.644      0.0