# 4. Motor improvements (UPDRS III & IV analysis)

This is the fourth Notebook that has to be runned. In this one we will study the motor improvements in the UPDRS scale

In [1]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import scipy.stats
from scipy.stats import chi2_contingency, f_oneway, kruskal, shapiro, anderson, ttest_rel, f_oneway, ttest_ind, kstest
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

In [None]:
df = pd.read_csv('/home/razkinm/projects/6articulodbs/raw/data.csv', sep=';')
df_e = pd.read_csv('/home/razkinm/projects/6articulodbs/derivates/df_preprocessed.csv', sep=',')

df_exploracion = df[['CIC', 'SEXO', 'DD', 'EDAD', 'UPDRS III Off - PREQX', 'UPDRS III On - PREQX', 'UPDRS IV - PRE', 'UPDRS III Off- POSTQX', 'UPDRS III On- POSTQX', 'UPDRS IV - POSTQX', 'LEAD IZQ', 'LEAD DER']]
df_exploracion = df_exploracion.dropna()
df_exploracion['COCANAL IZQ'] = None
df_exploracion['COCANAL DER'] = None

# Iterate over df_exploracion and df_e to compare values
for index_exploracion, row_exploracion in df_exploracion.iterrows():
    for index_e, row_e in df_e.iterrows():
        if row_exploracion['CIC'] == row_e['ID']:
            if row_e['HEMISFERIO'] == 1:
                df_exploracion.at[index_exploracion, 'COCANAL DER'] = row_e['COCANAL']
            if row_e['HEMISFERIO'] == 0:
                df_exploracion.at[index_exploracion, 'COCANAL IZQ'] = row_e['COCANAL']

df_exploracion['COCANAL DER'] = df_exploracion['COCANAL DER'].fillna(0)

df_exploracion['COCANAL_GENERAL'] = np.where(
    (df_exploracion['COCANAL IZQ'] == 0) & (df_exploracion['COCANAL DER'] == 0), 0, 
    np.where((df_exploracion['COCANAL IZQ'] == 1) & (df_exploracion['COCANAL DER'] == 1), 2, 1)
)

# create a new variable called 'COINCIDE IMAGEN' to check if LEAD IZQ and LEAD DER have the same value
df_exploracion['BOTH OPTIMO'] = np.where((df_exploracion['LEAD IZQ'] == 'optimo') & (df_exploracion['LEAD DER'] == 'optimo'), 1, 0)
conditions = [
    # 'optimo' paired with either 'optimo' or 'suboptimo' except when paired with 'fuera'
    ((df_exploracion['LEAD IZQ'] == 'optimo') & (df_exploracion['LEAD DER'].isin(['optimo', 'suboptimo']))) |
    ((df_exploracion['LEAD DER'] == 'optimo') & (df_exploracion['LEAD IZQ'].isin(['optimo', 'suboptimo']))),
    # All other cases get 0
    True  # Acts as a catch-all for any cases not covered above
]

# Define corresponding actions for each condition
choices = [
    1,  # Cases where 'optimo' is paired as specified
    0   # All other combinations
]

# Create the new column based on these conditions
df_exploracion['OPTIMO_vs_no'] = np.select(conditions, choices)

  df_exploracion['COCANAL DER'] = df_exploracion['COCANAL DER'].fillna(0)


## 4.1 UPDRS General Analysis

In [4]:
# UPDRS III Off - PREQX
mean_UPDRSIIIOffPREQX = df_exploracion['UPDRS III Off - PREQX'].mean()
std_UPDRSIIIOffPREQX = df_exploracion['UPDRS III Off - PREQX'].std()

# UPDRS III On - PREQX
mean_UPDRSIIIOnPREQX = df_exploracion['UPDRS III On - PREQX'].mean()
std_UPDRSIIIOnPREQX = df_exploracion['UPDRS III On - PREQX'].std()

# UPDRS IV - PRE
mean_UPDRSIVPRE = df_exploracion['UPDRS IV - PRE'].mean()
std_UPDRSIVPRE = df_exploracion['UPDRS IV - PRE'].std()

# UPDRS III Off - POSTQX
mean_UPDRSIIIOffPOSTQX = df_exploracion['UPDRS III Off- POSTQX'].mean()
std_UPDRSIIIOffPOSTQX = df_exploracion['UPDRS III Off- POSTQX'].std()

# UPDRS III Off - POSTQX
mean_UPDRSIIIOnPOSTQX = df_exploracion['UPDRS III On- POSTQX'].mean()
std_UPDRSIIIOnPOSTQX = df_exploracion['UPDRS III On- POSTQX'].std()

# UPDRS IV - POSTQX
mean_UPDRSIVPOST = df_exploracion['UPDRS IV - POSTQX'].mean()
std_UPDRSIVPOST = df_exploracion['UPDRS IV - POSTQX'].std()

# UPDRS III Off - PREQX
female_mean_UPDRSIIIOffPREQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III Off - PREQX'].mean()
female_std_UPDRSIIIOffPREQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III Off - PREQX'].std()

# UPDRS III On - PREQX
female_mean_UPDRSIIIOnPREQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III On - PREQX'].mean()
female_std_UPDRSIIIOnPREQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III On - PREQX'].std()

# UPDRS IV - PRE
female_mean_UPDRSIVPRE = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS IV - PRE'].mean()
female_std_UPDRSIVPRE = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS IV - PRE'].std()

# UPDRS III Off - POSTQX
female_mean_UPDRSIIIOffPOSTQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III Off- POSTQX'].mean()
female_std_UPDRSIIIOffPOSTQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III Off- POSTQX'].std()

# UPDRS III Off - POSTQX
female_mean_UPDRSIIIOnPOSTQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III On- POSTQX'].mean()
female_std_UPDRSIIIOnPOSTQX = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS III On- POSTQX'].std()

# UPDRS IV - POSTQX
female_mean_UPDRSIVPOST = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS IV - POSTQX'].mean()
female_std_UPDRSIVPOST = df_exploracion[df_exploracion['SEXO'] == 1]['UPDRS IV - POSTQX'].std()


# UPDRS III Off - PREQX
male_mean_UPDRSIIIOffPREQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III Off - PREQX'].mean()
male_std_UPDRSIIIOffPREQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III Off - PREQX'].std()

# UPDRS III On - PREQX
male_mean_UPDRSIIIOnPREQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III On - PREQX'].mean()
male_std_UPDRSIIIOnPREQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III On - PREQX'].std()

# UPDRS IV - PRE
male_mean_UPDRSIVPRE = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS IV - PRE'].mean()
male_std_UPDRSIVPRE = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS IV - PRE'].std()

# UPDRS III Off - POSTQX
male_mean_UPDRSIIIOffPOSTQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III Off- POSTQX'].mean()
male_std_UPDRSIIIOffPOSTQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III Off- POSTQX'].std()

# UPDRS III Off - POSTQX
male_mean_UPDRSIIIOnPOSTQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III On- POSTQX'].mean()
male_std_UPDRSIIIOnPOSTQX = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS III On- POSTQX'].std()

# UPDRS IV - POSTQX
male_mean_UPDRSIVPOST = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS IV - POSTQX'].mean()
male_std_UPDRSIVPOST = df_exploracion[df_exploracion['SEXO'] == 2]['UPDRS IV - POSTQX'].std()

# print all results
print('UPDRS III Off - PREQX')
print('Mean:', mean_UPDRSIIIOffPREQX)
print('Standard Deviation:', std_UPDRSIIIOffPREQX)
print('UPDRS III On - PREQX')
print('Mean:', mean_UPDRSIIIOnPREQX)
print('Standard Deviation:', std_UPDRSIIIOnPREQX)
print('UPDRS IV - PRE')
print('Mean:', mean_UPDRSIVPRE)
print('Standard Deviation:', std_UPDRSIVPRE)
print('UPDRS III Off - POSTQX')
print('Mean:', mean_UPDRSIIIOffPOSTQX)
print('Standard Deviation:', std_UPDRSIIIOffPOSTQX)
print('UPDRS III On - POSTQX')
print('Mean:', mean_UPDRSIIIOnPOSTQX)
print('Standard Deviation:', std_UPDRSIIIOnPOSTQX)
print('UPDRS IV - POSTQX')
print('Mean:', mean_UPDRSIVPOST)
print('Standard Deviation:', std_UPDRSIVPOST)

print('UPDRS FEMALE')
print('UPDRS III Off - PREQX')
print('Mean:', female_mean_UPDRSIIIOffPREQX)
print('Standard Deviation:', female_std_UPDRSIIIOffPREQX)
print('UPDRS III On - PREQX')
print('Mean:', female_mean_UPDRSIIIOnPREQX)
print('Standard Deviation:', female_std_UPDRSIIIOnPREQX)
print('UPDRS IV - PRE')
print('Mean:', female_mean_UPDRSIVPRE)
print('Standard Deviation:', female_std_UPDRSIVPRE)
print('UPDRS III Off - POSTQX')
print('Mean:', female_mean_UPDRSIIIOffPOSTQX)
print('Standard Deviation:', female_std_UPDRSIIIOffPOSTQX)
print('UPDRS III On - POSTQX')
print('Mean:', female_mean_UPDRSIIIOnPOSTQX)
print('Standard Deviation:', female_std_UPDRSIIIOnPOSTQX)
print('UPDRS IV - POSTQX')
print('Mean:', female_mean_UPDRSIVPOST)
print('Standard Deviation:', female_std_UPDRSIVPOST)


print('UPDRS MALE')
print('UPDRS III Off - PREQX')
print('Mean:', male_mean_UPDRSIIIOffPREQX)
print('Standard Deviation:', male_std_UPDRSIIIOffPREQX)
print('UPDRS III On - PREQX')
print('Mean:', male_mean_UPDRSIIIOnPREQX)
print('Standard Deviation:', male_std_UPDRSIIIOnPREQX)
print('UPDRS IV - PRE')
print('Mean:', male_mean_UPDRSIVPRE)
print('Standard Deviation:', male_std_UPDRSIVPRE)
print('UPDRS III Off - POSTQX')
print('Mean:', male_mean_UPDRSIIIOffPOSTQX)
print('Standard Deviation:', male_std_UPDRSIIIOffPOSTQX)
print('UPDRS III On - POSTQX')
print('Mean:', male_mean_UPDRSIIIOnPOSTQX)
print('Standard Deviation:', male_std_UPDRSIIIOnPOSTQX)
print('UPDRS IV - POSTQX')
print('Mean:', male_mean_UPDRSIVPOST)
print('Standard Deviation:', male_std_UPDRSIVPOST)



UPDRS III Off - PREQX
Mean: 39.90909090909091
Standard Deviation: 9.520910002343646
UPDRS III On - PREQX
Mean: 23.333333333333332
Standard Deviation: 9.926815535037743
UPDRS IV - PRE
Mean: 6.333333333333333
Standard Deviation: 3.7052890125692852
UPDRS III Off - POSTQX
Mean: 28.181818181818183
Standard Deviation: 9.129124223654156
UPDRS III On - POSTQX
Mean: 19.03030303030303
Standard Deviation: 9.458742148420319
UPDRS IV - POSTQX
Mean: 2.4242424242424243
Standard Deviation: 3.112779134373967
UPDRS FEMALE
UPDRS III Off - PREQX
Mean: 40.53846153846154
Standard Deviation: 5.059897637557909
UPDRS III On - PREQX
Mean: 23.46153846153846
Standard Deviation: 7.622503794854031
UPDRS IV - PRE
Mean: 8.384615384615385
Standard Deviation: 3.355057414771058
UPDRS III Off - POSTQX
Mean: 30.76923076923077
Standard Deviation: 8.535746854199365
UPDRS III On - POSTQX
Mean: 19.923076923076923
Standard Deviation: 8.538750283868032
UPDRS IV - POSTQX
Mean: 4.0
Standard Deviation: 3.9791121287711073
UPDRS MAL

## 4.2 Is there a stadistically difference between people that have optimo/suboptimo or suboptimo/fuera?

#### For UPDRS III Off

In [9]:
# Assuming 'UPDRS III Off - PREQX' and 'UPDRS III Off - POSTQX' are your columns
df_exploracion['DIFF UPDRS III Off'] = df_exploracion['UPDRS III Off- POSTQX'] - df_exploracion['UPDRS III Off - PREQX']
from scipy.stats import shapiro, levene, ttest_ind, mannwhitneyu, kstest

# Split the data into two groups
group1 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 1]['DIFF UPDRS III Off'].dropna()
group2 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 0]['DIFF UPDRS III Off'].dropna()

# Test for normality
print("Normality Test (Shapiro-Wilk):")
norm1 = scipy.stats.kstest(group1)
norm2 = scipy.stats.kstest(group2)
print("Group 1:", norm1)
print("Group 2:", norm2)

# Test for equality of variances
print("Equality of Variances Test (Levene’s Test):")
lev_test = levene(group1, group2)
print(lev_test)

# Choose the test based on the assumptions
print("\nIndependent Samples Test:")
if norm1.pvalue > 0.05 and norm2.pvalue > 0.05 and lev_test.pvalue > 0.05:
    # If both distributions are normal and variances are equal, use t-test
    t_stat, p_val = ttest_ind(group1, group2)
    print(f"Independent t-test: t={t_stat}, p={p_val}")
else:
    # If assumptions are not met, use Mann-Whitney U test
    u_stat, p_val = mannwhitneyu(group1, group2)
    print(f"Mann-Whitney U test: U={u_stat}, p={p_val}")

# Interpret the p-value
if p_val < 0.05:
    print("There is a statistically significant difference between the groups.")
else:
    print("There is no statistically significant difference between the groups.")



Normality Test (Shapiro-Wilk):


KeyError: 'cdf'

#### For UPDRS III On

In [12]:
# Assuming 'UPDRS III Off - PREQX' and 'UPDRS III Off - POSTQX' are your columns
df_exploracion['DIFF UPDRS III On'] = df_exploracion['UPDRS III On- POSTQX'] - df_exploracion['UPDRS III On - PREQX']
from scipy.stats import shapiro, levene, ttest_ind, mannwhitneyu, norm

# Split the data into two groups
group1 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 1]['DIFF UPDRS III On'].dropna()
group2 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 0]['DIFF UPDRS III On'].dropna()

data = group1
mean, std = norm.fit(data)  # Fit a normal distribution to get mean and std
stat, p = kstest(data, 'norm', args=(mean, std))
print("Statistic:", stat, "P-value:", p)

data = group2
mean, std = norm.fit(data)  # Fit a normal distribution to get mean and std
stat, p = kstest(data, 'norm', args=(mean, std))
print("Statistic:", stat, "P-value:", p)

if p > 0.05:
    print("Data is normally distributed (fail to reject H0).")
else:
    print("Data is not normally distributed (reject H0).")

if p > 0.05:
    print("Data is normally distributed (fail to reject H0).")
else:
    print("Data is not normally distributed (reject H0).")

# Test for normality
print("Normality Test (Shapiro-Wilk):")
norm1 = shapiro(group1)
norm2 = shapiro(group2)
print("Group 1:", norm1)
print("Group 2:", norm2)

# Test for equality of variances
print("Equality of Variances Test (Levene’s Test):")
lev_test = levene(group1, group2)
print(lev_test)

# Choose the test based on the assumptions
print("\nIndependent Samples Test:")
if norm1.pvalue > 0.05 and norm2.pvalue > 0.05 and lev_test.pvalue > 0.05:
    # If both distributions are normal and variances are equal, use t-test
    t_stat, p_val = ttest_ind(group1, group2)
    print(f"Independent t-test: t={t_stat}, p={p_val}")
else:
    # If assumptions are not met, use Mann-Whitney U test
    u_stat, p_val = mannwhitneyu(group1, group2)
    print(f"Mann-Whitney U test: U={u_stat}, p={p_val}")

# Interpret the p-value
if p_val < 0.05:
    print("There is a statistically significant difference between the groups.")
else:
    print("There is no statistically significant difference between the groups.")



Statistic: 0.12277454579818503 P-value: 0.8199745715209041
Statistic: 0.2513883786262233 P-value: 0.47715375455936204
Data is normally distributed (fail to reject H0).
Data is normally distributed (fail to reject H0).
Normality Test (Shapiro-Wilk):
Group 1: ShapiroResult(statistic=0.9832818016452944, pvalue=0.9477896866796243)
Group 2: ShapiroResult(statistic=0.874049147522443, pvalue=0.11140941511280406)
Equality of Variances Test (Levene’s Test):
LeveneResult(statistic=0.0337775492641026, pvalue=0.8553404555860696)

Independent Samples Test:
Independent t-test: t=-1.3424909689490065, p=0.18888368777381181
There is no statistically significant difference between the groups.


#### For UPDRS IV

In [39]:
# Assuming 'UPDRS III Off - PREQX' and 'UPDRS III Off - POSTQX' are your columns
df_exploracion['DIFF UPDRS IV'] = df_exploracion['UPDRS IV - POSTQX'] - df_exploracion['UPDRS IV - PRE']
from scipy.stats import shapiro, levene, ttest_ind, mannwhitneyu

# Split the data into two groups
group1 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 1]['DIFF UPDRS IV'].dropna()
group2 = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 0]['DIFF UPDRS IV'].dropna()

# Test for normality
print("Normality Test (Shapiro-Wilk):")
norm1 = shapiro(group1)
norm2 = shapiro(group2)
print("Group 1:", norm1)
print("Group 2:", norm2)

# Test for equality of variances
print("Equality of Variances Test (Levene’s Test):")
lev_test = levene(group1, group2)
print(lev_test)

# Choose the test based on the assumptions
print("\nIndependent Samples Test:")
if norm1.pvalue > 0.05 and norm2.pvalue > 0.05 and lev_test.pvalue > 0.05:
    # If both distributions are normal and variances are equal, use t-test
    t_stat, p_val = ttest_ind(group1, group2)
    print(f"Independent t-test: t={t_stat}, p={p_val}")
else:
    # If assumptions are not met, use Mann-Whitney U test
    u_stat, p_val = mannwhitneyu(group1, group2)
    print(f"Mann-Whitney U test: U={u_stat}, p={p_val}")

# Interpret the p-value
if p_val < 0.05:
    print("There is a statistically significant difference between the groups.")
else:
    print("There is no statistically significant difference between the groups.")



Normality Test (Shapiro-Wilk):
Group 1: ShapiroResult(statistic=0.9092815486801324, pvalue=0.03402764663337133)
Group 2: ShapiroResult(statistic=0.821087151691656, pvalue=0.026120730470066206)
Equality of Variances Test (Levene’s Test):
LeveneResult(statistic=0.048544278449981264, pvalue=0.8270156308646011)

Independent Samples Test:
Mann-Whitney U test: U=62.5, p=0.029876720872078998
There is a statistically significant difference between the groups.


## 4.3 Analysis for differences related to sex, age or disease duration

## Any difference related to age?

In [9]:
df = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'EDAD': 'EDAD'
})

df['UPDRS_III_Off_Diff'] = df['UPDRS_III_Off_POSTQX'] - df['UPDRS_III_Off_PREQX']
df['UPDRS_III_On_Diff'] = df['UPDRS_III_On_POSTQX'] - df['UPDRS_III_On_PREQX']
df['UPDRS_IV_Diff'] = df['UPDRS_IV_POSTQX'] - df['UPDRS_IV_PRE']

# Análisis de regresión para verificar el efecto de la edad
def regress_age_effect(df, dependent_var):
    X = sm.add_constant(df['EDAD'])
    model = sm.OLS(df[dependent_var], X).fit()
    print(f"Regression analysis for {dependent_var} on AGE")
    print(model.summary())

regress_age_effect(df, 'UPDRS_III_Off_Diff')
regress_age_effect(df, 'UPDRS_III_On_Diff')
regress_age_effect(df, 'UPDRS_IV_Diff')


Regression analysis for UPDRS_III_Off_Diff on AGE
                            OLS Regression Results                            
Dep. Variable:     UPDRS_III_Off_Diff   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.029
Method:                 Least Squares   F-statistic:                   0.06943
Date:                Sat, 27 Jul 2024   Prob (F-statistic):              0.794
Time:                        16:05:03   Log-Likelihood:                -124.02
No. Observations:                  34   AIC:                             252.0
Df Residuals:                      32   BIC:                             255.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
co

No hay evidencia significativa de que la edad tenga un efecto en las diferencias en las puntuaciones UPDRS_III_Off (p-valor = 0.794).
No hay evidencia significativa de que la edad tenga un efecto en las diferencias en las puntuaciones UPDRS_III_On (p-valor = 0.653).
No hay evidencia significativa de que la edad tenga un efecto en las diferencias en las puntuaciones UPDRS_IV_Diff (p-valor = 0.412).

## Any difference related to sex?

### Linear Regression Analysis

In [10]:
df = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'SEXO': 'SEXO'
})

# Calcular las diferencias en las puntuaciones
df['UPDRS_III_Off_Diff'] = df['UPDRS_III_Off_POSTQX'] - df['UPDRS_III_Off_PREQX']
df['UPDRS_III_On_Diff'] = df['UPDRS_III_On_POSTQX'] - df['UPDRS_III_On_PREQX']
df['UPDRS_IV_Diff'] = df['UPDRS_IV_POSTQX'] - df['UPDRS_IV_PRE']

# Análisis de regresión para verificar el efecto del sexo
def regress_sex_effect(df, dependent_var):
    X = sm.add_constant(df['SEXO'])
    model = sm.OLS(df[dependent_var], X).fit()
    print(f"Regression analysis for {dependent_var} on SEX")
    print(model.summary())

# Verificar el efecto del sexo en las diferencias UPDRS
regress_sex_effect(df, 'UPDRS_III_Off_Diff')
regress_sex_effect(df, 'UPDRS_III_On_Diff')
regress_sex_effect(df, 'UPDRS_IV_Diff')



Regression analysis for UPDRS_III_Off_Diff on SEX
                            OLS Regression Results                            
Dep. Variable:     UPDRS_III_Off_Diff   R-squared:                       0.092
Model:                            OLS   Adj. R-squared:                  0.064
Method:                 Least Squares   F-statistic:                     3.248
Date:                Sat, 27 Jul 2024   Prob (F-statistic):             0.0809
Time:                        16:05:59   Log-Likelihood:                -122.41
No. Observations:                  34   AIC:                             248.8
Df Residuals:                      32   BIC:                             251.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
co

In [7]:
# Filtrar los datos según LEAD y SEXO
df_filtered_optimosub_male = df_exploracion[(df_exploracion['OPTIMO_vs_no'] == 1) & (df_exploracion['SEXO'] == 2)]
df_filtered_optimosub_female = df_exploracion[(df_exploracion['OPTIMO_vs_no'] == 1) & (df_exploracion['SEXO'] == 1)]
df_filtered_subfuera_male = df_exploracion[(df_exploracion['OPTIMO_vs_no'] == 0) & (df_exploracion['SEXO'] == 2)]
df_filtered_subfuera_female = df_exploracion[(df_exploracion['OPTIMO_vs_no'] == 0) & (df_exploracion['SEXO'] == 1)]

# Diccionario para almacenar los resultados
results = {
    'Group': [],
    'UPDRS III Off': [],
    'UPDRS III On': [],
    'UPDRS IV': []
}

def paired_t_tests(df, group_name):
    # Calcular el tamaño de la muestra
    n = len(df)
    
    # Prueba t pareada para UPDRS III Off
    ttest_off = ttest_rel(df['UPDRS III Off - PREQX'], df['UPDRS III Off- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III Off (N={n}): t={ttest_off.statistic}, p={ttest_off.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS III On
    ttest_on = ttest_rel(df['UPDRS III On - PREQX'], df['UPDRS III On- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III On (N={n}): t={ttest_on.statistic}, p={ttest_on.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS IV
    ttest_iv = ttest_rel(df['UPDRS IV - PRE'], df['UPDRS IV - POSTQX'])
    print(f"Paired t-test {group_name} UPDRS IV (N={n}): t={ttest_iv.statistic}, p={ttest_iv.pvalue:.3f}")
    
    # Guardar los resultados en el diccionario
    results['Group'].append(group_name)
    results['UPDRS III Off'].append(round(ttest_off.pvalue, 3))
    results['UPDRS III On'].append(round(ttest_on.pvalue, 3))
    results['UPDRS IV'].append(round(ttest_iv.pvalue, 3))

# Realizar las pruebas para cada grupo y sexo
print('OPTIMOSUB - Male')
paired_t_tests(df_filtered_optimosub_male, 'OPTIMO - Male')

print('OPTIMOSUB - Female')
paired_t_tests(df_filtered_optimosub_female, 'OPTIMO - Female')

# Crear la tabla comparativa
results_df = pd.DataFrame(results)
print("\nResultados Comparativos de p-valores")
print(results_df)


OPTIMOSUB - Male
Paired t-test OPTIMO - Male UPDRS III Off (N=12): t=7.653321493807926, p=0.000
Paired t-test OPTIMO - Male UPDRS III On (N=12): t=3.5514017050707385, p=0.005
Paired t-test OPTIMO - Male UPDRS IV (N=12): t=7.895146188218008, p=0.000
OPTIMOSUB - Female
Paired t-test OPTIMO - Female UPDRS III Off (N=12): t=4.5133546692422, p=0.001
Paired t-test OPTIMO - Female UPDRS III On (N=12): t=1.434289725613882, p=0.179
Paired t-test OPTIMO - Female UPDRS IV (N=12): t=4.103259033241449, p=0.002

Resultados Comparativos de p-valores
             Group  UPDRS III Off  UPDRS III On  UPDRS IV
0    OPTIMO - Male          0.000         0.005     0.000
1  OPTIMO - Female          0.001         0.179     0.002


In [11]:
# df_sex = df_exploracion but just the rows that have OPTIMO_vs_no == 1
df_sex = df_exploracion[df_exploracion['OPTIMO_vs_no'] == 1]
df = df_sex.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'SEXO': 'SEXO'
})

# Calcular las diferencias en las puntuaciones
df['UPDRS_III_Off_Diff'] = df['UPDRS_III_Off_POSTQX'] - df['UPDRS_III_Off_PREQX']
df['UPDRS_III_On_Diff'] = df['UPDRS_III_On_POSTQX'] - df['UPDRS_III_On_PREQX']
df['UPDRS_IV_Diff'] = df['UPDRS_IV_POSTQX'] - df['UPDRS_IV_PRE']

# Análisis de regresión para verificar el efecto del sexo
def regress_sex_effect(df, dependent_var):
    X = sm.add_constant(df['SEXO'])
    model = sm.OLS(df[dependent_var], X).fit()
    print(f"Regression analysis for {dependent_var} on SEX")
    print(model.summary())

# Verificar el efecto del sexo en las diferencias UPDRS
regress_sex_effect(df, 'UPDRS_III_Off_Diff')
regress_sex_effect(df, 'UPDRS_III_On_Diff')
regress_sex_effect(df, 'UPDRS_IV_Diff')

Regression analysis for UPDRS_III_Off_Diff on SEX
                            OLS Regression Results                            
Dep. Variable:     UPDRS_III_Off_Diff   R-squared:                       0.182
Model:                            OLS   Adj. R-squared:                  0.144
Method:                 Least Squares   F-statistic:                     4.884
Date:                Wed, 07 Aug 2024   Prob (F-statistic):             0.0378
Time:                        13:32:19   Log-Likelihood:                -81.894
No. Observations:                  24   AIC:                             167.8
Df Residuals:                      22   BIC:                             170.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
co

## Any difference related to disease duration?

#### Linear Regression

In [11]:
df = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'DD': 'DURATION'
})

# Calcular las diferencias en las puntuaciones
df['UPDRS_III_Off_Diff'] = df['UPDRS_III_Off_POSTQX'] - df['UPDRS_III_Off_PREQX']
df['UPDRS_III_On_Diff'] = df['UPDRS_III_On_POSTQX'] - df['UPDRS_III_On_PREQX']
df['UPDRS_IV_Diff'] = df['UPDRS_IV_POSTQX'] - df['UPDRS_IV_PRE']

# Análisis de regresión para verificar el efecto del sexo
def regress_sex_effect(df, dependent_var):
    X = sm.add_constant(df['DURATION'])
    model = sm.OLS(df[dependent_var], X).fit()
    print(f"Regression analysis for {dependent_var} on DURATION")
    print(model.summary())

# Verificar el efecto del sexo en las diferencias UPDRS
regress_sex_effect(df, 'UPDRS_III_Off_Diff')
regress_sex_effect(df, 'UPDRS_III_On_Diff')
regress_sex_effect(df, 'UPDRS_IV_Diff')

Regression analysis for UPDRS_III_Off_Diff on DURATION
                            OLS Regression Results                            
Dep. Variable:     UPDRS_III_Off_Diff   R-squared:                       0.076
Model:                            OLS   Adj. R-squared:                  0.047
Method:                 Least Squares   F-statistic:                     2.637
Date:                Sat, 27 Jul 2024   Prob (F-statistic):              0.114
Time:                        16:06:31   Log-Likelihood:                -122.71
No. Observations:                  34   AIC:                             249.4
Df Residuals:                      32   BIC:                             252.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------