In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_excel('입시생_크로스핏_측정기록(ACSM).xlsx')
df.head()

In [None]:
df = df.drop(0)
df

In [None]:
df['age'] = df['age'].astype(int)
df['gender'] = df['gender'].astype(int)
df['group'] = df['group'].astype(int)

In [None]:
df.head()

In [None]:
df['YB_Anterior_L_Max'] = df[['YB_Anterior_L1', 'YB_Anterior_L2']].max(axis = 1)
df['YB_Anterior_R_Max'] = df[['YB_Anterior_R1', 'YB_Anterior_R2']].max(axis = 1)
df['YB_Posteromedial_L_Max'] = df[['YB_Posteromedial_L1', 'YB_Posteromedial_L2']].max(axis = 1)
df['YB_Posteromedial_R_Max'] = df[['YB_Posteromedial_R1', 'YB_Posteromedial_R2']].max(axis = 1)
df['YB_Posterolateral_L_Max'] = df[['YB_Posterolateral_L1', 'YB_Posterolateral_L2']].max(axis = 1)
df['YB_Posterolateral_R_Max'] = df[['YB_Posterolateral_R1', 'YB_Posterolateral_R2']].max(axis = 1)

In [None]:
df

In [None]:
#성별별
df_male = df[df['gender'] == 1]
df_female = df[df['gender'] == 2]
#군집별
df_pe = df[df['group'] == 1]
df_cf = df[df['group'] == 2]

### 전체분포

In [None]:
df.describe()

In [None]:
df['Leglength_R'] = df['Leglength_R'].astype(int)

In [None]:
df['YB_Anterior_L_Comp'] = (df['YB_Anterior_L_Max']/df['Leglength_R'])*100
df['YB_Anterior_R_Comp'] = (df['YB_Anterior_R_Max']/df['Leglength_R'])*100 
df['YB_Posteromedial_L_Comp'] = (df['YB_Posteromedial_L_Max']/df['Leglength_R'])*100
df['YB_Posteromedial_R_Comp'] = (df['YB_Posteromedial_R_Max']/df['Leglength_R'])*100
df['YB_Posterolateral_L_Comp'] = (df['YB_Posterolateral_L_Max']/df['Leglength_R'])*100
df['YB_Posterolateral_R_Comp'] = (df['YB_Posterolateral_R_Max']/df['Leglength_R'])*100

In [None]:
df.head()

In [None]:
df

In [None]:
from scipy import stats
import seaborn as sns

def normality(df, column, alpha = 0.05, bins = 20, kde = True, plot=True, verbose=True):
    data = df[column].dropna()
    mean = data.mean()
    std = data.std(ddof = 1)
    
    stat, p = stats.shapiro(data)
    decision = "정규분포" if p > alpha else "정규분포 아님"
    
    if verbose:
        print(f"[Shapiro–Wilk 정규성 검정 - {column}]")
        print(f"평균: {mean:.4f}, 표준편차: {std:.4f}")
        print(f"W = {stat:.4f}, p-value = {p:.4f} → {decision}\n")
    
    if plot:
        fig, axes = plt.subplots(1, 2, figsize=(10, 4))

        sns.histplot(data, bins=bins, kde=kde, ax=axes[0])
        axes[0].set_title(f"{column}")
        axes[0].grid(True, alpha=0.3)

        stats.probplot(data, dist="norm", plot=axes[1])
        axes[1].set_title(f"{column} Q–Q Plot")

        plt.tight_layout()
        plt.show()

        return {
            'column': column,
            'mean': mean,
            'std': std,
            'stat': stat,
            'pvalue': p,
            'decision': decision
        }  
    

normality(df, 'YB_Anterior_L_Max')

In [None]:
normality(df, 'YB_Anterior_R_Max')

In [None]:
normality(df, 'YB_Posteromedial_L_Max')

In [None]:
normality(df, 'YB_Posteromedial_R_Max')

In [None]:
normality(df, 'YB_Posterolateral_L_Max')

In [None]:
normality(df, 'YB_Posterolateral_R_Max')

In [None]:
normality(df, 'YB_Anterior_L_Comp')

In [None]:
normality(df, 'YB_Anterior_R_Comp')

In [None]:
normality(df, 'YB_Posteromedial_L_Comp')

In [None]:
normality(df, 'YB_Posteromedial_R_Comp')

In [None]:
normality(df, 'YB_Posterolateral_L_Comp')

In [None]:
normality(df, 'YB_Posterolateral_R_Comp')

In [None]:
def convert_to_int(df):
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col], errors = 'raise').astype(float)
        except:
            continue
    return df
df = convert_to_int(df)

In [None]:
normality(df, 'Hip_ext_L')

In [None]:
normality(df, 'Hip_ext_R')

In [None]:
normality(df, 'Hip_flex_L')

In [None]:
normality(df, 'Hip_flex_R')

In [None]:
normality(df, 'Knee_ext_L')

In [None]:
normality(df, 'Knee_ext_R')

In [None]:
normality(df, 'Ankle2_dor_L')

In [None]:
normality(df, 'Ankle2_dor_R')

In [None]:
normality(df, 'Ankle2_plnt_L')

In [None]:
normality(df, 'Ankle2_plnt_R')

###  ROM 상태 레이블링
* 연령별
    * 9-19
    * 20-44
    * 45-69
* 정상가동범위
    * 미달 = 1
    * 정상 = 0

In [None]:
df

In [None]:
for col in df.columns:
    print(col)

In [None]:
def age_group(age):
    if 9 <= age <= 19:
        return 1
    elif 20 <= age <= 44:
        return 2
    elif 45 <= age <= 69:
        return 3
    else: return None
    
df['age_group'] = df['age'].apply(age_group)

ag = df.pop('age_group')
df.insert(7, 'age_group', ag)

In [None]:
df

In [None]:
df[['group', 'gender']].value_counts()

In [None]:
def hip_ext_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 17:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 16:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 13:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 19:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 17:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 16:
                return 0
            else: return 1
        else: return None
        
        
df['hip_ext_L_status'] = df.apply(
    lambda row: hip_ext_range(row['age_group'], row['gender'], row['Hip_ext_L']), axis = 1
)
df['hip_ext_R_status'] = df.apply(
    lambda row: hip_ext_range(row['age_group'], row['gender'], row['Hip_ext_R']), axis = 1
)


In [None]:
def hip_flex_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 133:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 129:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 126:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 133:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 133:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 129:
                return 0
            else: return 1
        else: return None
        
        
df['hip_flex_L_status'] = df.apply(
    lambda row: hip_flex_range(row['age_group'], row['gender'], row['Hip_flex_L']), axis = 1
)
df['hip_flex_R_status'] = df.apply(
    lambda row: hip_flex_range(row['age_group'], row['gender'], row['Hip_flex_R']), axis = 1
)


In [None]:
def knee_ext_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 1:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 1:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 0:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 2:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 1:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 1:
                return 0
            else: return 1
        else: return None
        
        
df['knee_ext_L_status'] = df.apply(
    lambda row: knee_ext_range(row['age_group'], row['gender'], row['Knee_ext_L']), axis = 1
)
df['knee_ext_R_status'] = df.apply(
    lambda row: knee_ext_range(row['age_group'], row['gender'], row['Knee_ext_R']), axis = 1
)

In [None]:
def knee_flex_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 140:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 137:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 132:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 141:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 141:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 137:
                return 0
            else: return 1
        else: return None
        
        
df['knee_flex_L_status'] = df.apply(
    lambda row: knee_flex_range(row['age_group'], row['gender'], row['Knee_flex_L']), axis = 1
)
df['knee_flex_R_status'] = df.apply(
    lambda row: knee_flex_range(row['age_group'], row['gender'], row['Knee_flex_R']), axis = 1
)

In [None]:
def Ankle_dor_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 15:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 12:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 11:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 16:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 13:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 11:
                return 0
            else: return 1
        else: return None
        
df['Ankle2_dor_L_status'] = df.apply(
    lambda row: Ankle_dor_range(row['age_group'], row['gender'], row['Ankle2_dor_L']), axis = 1
)
df['Ankle2_dor_R_status'] = df.apply(
    lambda row: Ankle_dor_range(row['age_group'], row['gender'], row['Ankle2_dor_R']), axis = 1
)

In [None]:
def ankle_plnt_range(age, gender, rec):
    if gender == 1:
        if age == 1:
            if rec >= 51:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 54:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 48:
                return 0
            else: return 1
        else: return None
         
    else:
        if age == 1:
            if rec >= 55:
                return 0
            else: return 1
        elif age == 2:
            if rec >= 61:
                return 0
            else: return 1
        elif age == 3:
            if rec >= 55:
                return 0
            else: return 1
        else: return None
        
        
df['Ankle_plnt_L_status'] = df.apply(
    lambda row: ankle_plnt_range(row['age_group'], row['gender'], row['Ankle2_plnt_L']), axis = 1
)
df['Ankle_plnt_R_status'] = df.apply(
    lambda row: ankle_plnt_range(row['age_group'], row['gender'], row['Ankle2_plnt_R']), axis = 1
)

In [None]:
for col in df.columns:
    print(col)

In [None]:
df.columns[df.columns.str.startsswith('YB_')]

In [None]:
df.columns[df.columns.str.endswith('_status')]

In [None]:
rom_cols = ['Hip_ext_L', 'Hip_ext_R', 'Hip_flex_L', 'Hip_flex_R', 'Knee_ext_L', 'Knee_ext_R', 'Ankle2_dor_L', 'Ankle2_dor_R', 'Ankle2_plnt_L', 'Ankle2_plnt_R']
rom_status_cols = ['hip_ext_L_status', 'hip_ext_R_status', 'hip_flex_L_status', 'hip_flex_R_status', 'knee_ext_L_status', 'knee_ext_R_status', 'knee_flex_L_status', 'knee_flex_R_status', 'Ankle2_dor_L_status', 'Ankle2_dor_R_status', 'Ankle_plnt_L_status', 'Ankle_plnt_R_status']
ybt_cols1 = ['YB_rel_distance_L1', 'YB_rel_distance_L2', 'YB_rel_distance_R1', 'YB_rel_distance_R2']
ybt_cols2 = ['YB_Anterior_L_Comp', 'YB_Anterior_R_Comp', 'YB_Posteromedial_L_Comp', 'YB_Posteromedial_R_Comp', 'YB_Posterolateral_L_Comp', 'YB_Posterolateral_R_Comp']

results1 = []
results2 = []

for stat in rom_status_cols:
    for col2 in ybt_cols2:
        group0 = df[(df[stat] == 0)][col2].dropna()
        group1 = df[(df[stat] == 1)][col2].dropna()
        
        t_stat, pval = stats.ttest_ind(group0, group1, equal_var = True)
    
        results1.append({
            'ROM Status' : stat,
            'Rel_Dist' : col2,
            'n_normal' : len(group0),
            'n_abnrml' : len(group1),
            'm_0' : group0.mean(),
            'm_1' : group1.mean(),
            'T' : t_stat,
            'P': pval
        })
        
ttest_df = pd.DataFrame(results1)
ttest_df['Valid'] = ttest_df['P'] < 0.05
ttest_df = ttest_df[ttest_df['Valid'] == True]
ttest_df
        

In [None]:
ttest_df.to_csv('ttest_df.csv')

### Logistic Regression

In [None]:
import statsmodels.api as sm

rom_cols = ['Hip_ext_L', 'Hip_ext_R', 'Hip_flex_L', 'Hip_flex_R', 'Knee_ext_L', 'Knee_ext_R', 'Ankle2_dor_L', 'Ankle2_dor_R', 'Ankle2_plnt_L', 'Ankle2_plnt_R']
rom_status_cols = ['hip_ext_L_status', 'hip_ext_R_status', 'hip_flex_L_status', 'hip_flex_R_status', 'knee_ext_L_status', 'knee_ext_R_status', 'knee_flex_L_status', 'knee_flex_R_status', 'Ankle2_dor_L_status', 'Ankle2_dor_R_status', 'Ankle_plnt_L_status', 'Ankle_plnt_R_status']
ybt_cols1 = ['YB_rel_distance_L1', 'YB_rel_distance_L2', 'YB_rel_distance_R1', 'YB_rel_distance_R2']
ybt_cols2 = ['YB_Anterior_L_Comp', 'YB_Anterior_R_Comp', 'YB_Posteromedial_L_Comp', 'YB_Posteromedial_R_Comp', 'YB_Posterolateral_L_Comp', 'YB_Posterolateral_R_Comp']

logit_res1 = []

for stat in rom_status_cols:
    for col in ybt_cols1:
        if stat not in df.columns or col not in df.columns:
            continue
        df_tmp = df[[stat, col]].dropna()
        X = sm.add_constant(df_tmp[col])
        y = df_tmp[stat]
        model = sm.Logit(y, X).fit(disp = False)
        OR = np.exp(model.params[col])
        logit_res1.append({
            'ROM Status' : stat,
            'Rel_Dist' : col,
            'B' : model.params[col],
            'P' : model.pvalues[col],
            'OR' : OR,
        })
logit_df1 = pd.DataFrame(logit_res1)
logit_df1['Valid'] = logit_df1['P'] < 0.05
logit_df1= logit_df1.round(4)
logit_df1= logit_df1[logit_df1['Valid'] == True]
logit_df1

In [None]:
sns.barplot(logit_df1)

In [None]:
logit_res2 = []

for stat in rom_status_cols:
    for col in ybt_cols2:
        if stat not in df.columns or col not in df.columns:
            continue
        df_tmp = df[[stat, col]].dropna()
        X = sm.add_constant(df_tmp[col])
        y = df_tmp[stat]
        model = sm.Logit(y, X).fit(disp = False)
        OR = np.exp(model.params[col])
        logit_res2.append({
            'ROM Status' : stat,
            'Rel_Dist' : col,
            'B' : model.params[col],
            'P' : model.pvalues[col],
            'OR' : OR
        })
logit_df2 = pd.DataFrame(logit_res2)
logit_df2['Valid'] = logit_df2['P'] < 0.05
logit_df2 = logit_df2.round(4)
logit_df2 = logit_df2[logit_df2['Valid'] == True]
logit_df2

In [None]:
pval.to_csv('pval.csv')

In [None]:
df_pe = df[df['group'] == 1]
df_cf = df[df['group'] == 2]

In [None]:
df_pe = df_pe.replace(99999, np.nan)
df_pe.describe()

In [None]:
df_cf.describe()

### 체대입시 / 크로스핏 구분

In [None]:
rom_cols = ['Hip_ext_L', 'Hip_ext_R', 'Hip_flex_L', 'Hip_flex_R', 'Knee_ext_L', 'Knee_ext_R', 'Ankle2_dor_L', 'Ankle2_dor_R', 'Ankle2_plnt_L', 'Ankle2_plnt_R']
rom_status_cols = ['hip_ext_L_status', 'hip_ext_R_status', 'hip_flex_L_status', 'hip_flex_R_status', 'knee_ext_L_status', 'knee_ext_R_status', 'knee_flex_L_status', 'knee_flex_R_status', 'Ankle2_dor_L_status', 'Ankle2_dor_R_status', 'Ankle_plnt_L_status', 'Ankle_plnt_R_status']
ybt_cols1 = ['YB_rel_distance_L1', 'YB_rel_distance_L2', 'YB_rel_distance_R1', 'YB_rel_distance_R2']
ybt_cols2 = ['YB_Anterior_L_Comp', 'YB_Anterior_R_Comp', 'YB_Posteromedial_L_Comp', 'YB_Posteromedial_R_Comp', 'YB_Posterolateral_L_Comp', 'YB_Posterolateral_R_Comp']

results1 = []
results2 = []

for stat in rom_status_cols:
    for col2 in ybt_cols2:
        group0 = df_pe[(df[stat] == 0)][col2].dropna()
        group1 = df_pe[(df[stat] == 1)][col2].dropna()
        
        t_stat, pval = stats.ttest_ind(group0, group1, equal_var = True)
    
        results1.append({
            'ROM Status' : stat,
            'Rel_Dist' : col2,
            'n_normal' : len(group0),
            'n_abnrml' : len(group1),
            'm_0' : group0.mean(),
            'm_1' : group1.mean(),
            'T' : t_stat,
            'P': pval
        })
        
ttest_dfpe = pd.DataFrame(results1)
ttest_dfpe['Valid'] = ttest_dfpe['P'] < 0.05
ttest_dfpe = ttest_dfpe[ttest_dfpe['Valid'] == True]
ttest_dfpe

In [None]:
results1 = []
results2 = []

for stat in rom_status_cols:
    for col2 in ybt_cols2:
        group0 = df_cf[(df[stat] == 0)][col2].dropna()
        group1 = df_cf[(df[stat] == 1)][col2].dropna()
        
        t_stat, pval = stats.ttest_ind(group0, group1, equal_var = True)
    
        results2.append({
            'ROM Status' : stat,
            'Rel_Dist' : col2,
            'n_normal' : len(group0),
            'n_abnrml' : len(group1),
            'm_0' : group0.mean(),
            'm_1' : group1.mean(),
            'T' : t_stat,
            'P': pval
        })
        
ttest_dfcf = pd.DataFrame(results2)
ttest_dfcf['Valid'] = ttest_dfcf['P'] < 0.05
ttest_dfcf = ttest_dfcf[ttest_dfcf['Valid'] == True]
ttest_dfcf

In [None]:
logit_res1 = []

for stat in rom_status_cols:
    for col in ybt_cols2:
        if stat not in df_pe.columns or col not in df_pe.columns:
            continue
        df_tmp = df_pe[[stat, col]].dropna()
        X = sm.add_constant(df_tmp[col])
        y = df_tmp[stat]
        model = sm.Logit(y, X).fit(disp = False)
        OR = np.exp(model.params[col])
        logit_res1.append({
            'ROM Status' : stat,
            'Rel_Dist' : col,
            'B' : model.params[col],
            'P' : model.pvalues[col],
            'OR' : OR,
        })
logit_dfpe = pd.DataFrame(logit_res1)
logit_dfpe['Valid'] = logit_dfpe['P'] < 0.05
logit_dfpe= logit_dfpe.round(4)
logit_dfpe= logit_dfpe[logit_dfpe['Valid'] == True]
logit_dfpe

In [None]:
logit_res2 = []

for stat in rom_status_cols:
    for col in ybt_cols2:
        if stat not in df_pe.columns or col not in df_pe.columns:
            continue
        df_tmp = df_cf[[stat, col]].dropna()
        X = sm.add_constant(df_tmp[col])
        y = df_tmp[stat]
        model = sm.Logit(y, X).fit(disp = False)
        OR = np.exp(model.params[col])
        logit_res2.append({
            'ROM Status' : stat,
            'Rel_Dist' : col,
            'B' : model.params[col],
            'P' : model.pvalues[col],
            'OR' : OR,
        })
logit_dfcf = pd.DataFrame(logit_res2)
logit_dfcf['Valid'] = logit_dfcf['P'] < 0.05
logit_dfcf= logit_dfcf.round(4)
logit_dfcf= logit_dfcf[logit_dfcf['Valid'] == True]
logit_dfcf