In [1]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import chi2_contingency, f_oneway, kruskal, shapiro, anderson, ttest_rel, f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [2]:
df = pd.read_csv('~/projects/articulodbs/raw/data.csv', sep=';')
df_exploracion = df[['CIC', 'SEXO', 'DD', 'EDAD', 'UPDRS III Off - PREQX', 'UPDRS III On - PREQX', 'UPDRS IV - PRE', 'UPDRS III Off- POSTQX', 'UPDRS III On- POSTQX', 'UPDRS IV - POSTQX', 'LEAD IZQ', 'LEAD DER']]
df_exploracion = df_exploracion.dropna()

### Pruebas para cada grupo y sexo DERECHA

In [None]:
# Filtrar los datos según LEAD y SEXO
df_filtered_optimo_male = df_exploracion[(df_exploracion['LEAD DER'] == 'optimo') & (df_exploracion['SEXO'] == 2)]
df_filtered_optimo_female = df_exploracion[(df_exploracion['LEAD DER'] == 'optimo') & (df_exploracion['SEXO'] == 1)]
df_filtered_suboptimo_male = df_exploracion[(df_exploracion['LEAD DER'] == 'suboptimo') & (df_exploracion['SEXO'] == 2)]
df_filtered_suboptimo_female = df_exploracion[(df_exploracion['LEAD DER'] == 'suboptimo') & (df_exploracion['SEXO'] == 1)]
df_filtered_fuera_male = df_exploracion[(df_exploracion['LEAD DER'] == 'fuera') & (df_exploracion['SEXO'] == 2)]
df_filtered_fuera_female = df_exploracion[(df_exploracion['LEAD DER'] == 'fuera') & (df_exploracion['SEXO'] == 1)]

# Diccionario para almacenar los resultados
results = {
    'Group': [],
    'UPDRS III Off': [],
    'UPDRS III On': [],
    'UPDRS IV': []
}

def paired_t_tests(df, group_name):
    # Calcular el tamaño de la muestra
    n = len(df)
    
    # Prueba t pareada para UPDRS III Off
    ttest_off = ttest_rel(df['UPDRS III Off - PREQX'], df['UPDRS III Off- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III Off (N={n}): t={ttest_off.statistic}, p={ttest_off.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS III On
    ttest_on = ttest_rel(df['UPDRS III On - PREQX'], df['UPDRS III On- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III On (N={n}): t={ttest_on.statistic}, p={ttest_on.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS IV
    ttest_iv = ttest_rel(df['UPDRS IV - PRE'], df['UPDRS IV - POSTQX'])
    print(f"Paired t-test {group_name} UPDRS IV (N={n}): t={ttest_iv.statistic}, p={ttest_iv.pvalue:.3f}")
    
    # Guardar los resultados en el diccionario
    results['Group'].append(group_name)
    results['UPDRS III Off'].append(round(ttest_off.pvalue, 3))
    results['UPDRS III On'].append(round(ttest_on.pvalue, 3))
    results['UPDRS IV'].append(round(ttest_iv.pvalue, 3))

# Realizar las pruebas para cada grupo y sexo
print('OPTIMO - Male')
paired_t_tests(df_filtered_optimo_male, 'OPTIMO - Male')

print('OPTIMO - Female')
paired_t_tests(df_filtered_optimo_female, 'OPTIMO - Female')

print('SUBOPTIMO - Male')
paired_t_tests(df_filtered_suboptimo_male, 'SUBOPTIMO - Male')

print('SUBOPTIMO - Female')
paired_t_tests(df_filtered_suboptimo_female, 'SUBOPTIMO - Female')

print('FUERA - Male')
paired_t_tests(df_filtered_fuera_male, 'FUERA - Male')

print('FUERA - Female')
paired_t_tests(df_filtered_fuera_female, 'FUERA - Female')

# Crear la tabla comparativa
results_df = pd.DataFrame(results)
print("\nResultados Comparativos de p-valores")
print(results_df)


OPTIMO - Male
Paired t-test OPTIMO - Male UPDRS III Off (N=13): t=9.0699907688816, p=0.000
Paired t-test OPTIMO - Male UPDRS III On (N=13): t=3.6757051904250333, p=0.003
Paired t-test OPTIMO - Male UPDRS IV (N=13): t=7.4336408571737325, p=0.000
OPTIMO - Female
Paired t-test OPTIMO - Female UPDRS III Off (N=9): t=3.9269275978582816, p=0.004
Paired t-test OPTIMO - Female UPDRS III On (N=9): t=1.3898862837218868, p=0.202
Paired t-test OPTIMO - Female UPDRS IV (N=9): t=3.5856096494623335, p=0.007
SUBOPTIMO - Male
Paired t-test SUBOPTIMO - Male UPDRS III Off (N=5): t=1.6099466609552864, p=0.183
Paired t-test SUBOPTIMO - Male UPDRS III On (N=5): t=1.1278673705953686, p=0.322
Paired t-test SUBOPTIMO - Male UPDRS IV (N=5): t=2.2322748145014826, p=0.089
SUBOPTIMO - Female
Paired t-test SUBOPTIMO - Female UPDRS III Off (N=4): t=1.6666666666666667, p=0.194
Paired t-test SUBOPTIMO - Female UPDRS III On (N=4): t=0.3364632924552266, p=0.759
Paired t-test SUBOPTIMO - Female UPDRS IV (N=4): t=1.623279

  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero


### Pruebas para cada grupo y sexo IZQUIERDA

In [None]:
# Filtrar los datos según LEAD y SEXO
df_filtered_optimo_male = df_exploracion[(df_exploracion['LEAD IZQ'] == 'optimo') & (df_exploracion['SEXO'] == 2)]
df_filtered_optimo_female = df_exploracion[(df_exploracion['LEAD IZQ'] == 'optimo') & (df_exploracion['SEXO'] == 1)]
df_filtered_suboptimo_male = df_exploracion[(df_exploracion['LEAD IZQ'] == 'suboptimo') & (df_exploracion['SEXO'] == 2)]
df_filtered_suboptimo_female = df_exploracion[(df_exploracion['LEAD IZQ'] == 'suboptimo') & (df_exploracion['SEXO'] == 1)]
df_filtered_fuera_male = df_exploracion[(df_exploracion['LEAD IZQ'] == 'fuera') & (df_exploracion['SEXO'] == 2)]
df_filtered_fuera_female = df_exploracion[(df_exploracion['LEAD IZQ'] == 'fuera') & (df_exploracion['SEXO'] == 1)]

# Diccionario para almacenar los resultados
results = {
    'Group': [],
    'UPDRS III Off': [],
    'UPDRS III On': [],
    'UPDRS IV': []
}

def paired_t_tests(df, group_name):
    # Calcular el tamaño de la muestra
    n = len(df)
    
    # Prueba t pareada para UPDRS III Off
    ttest_off = ttest_rel(df['UPDRS III Off - PREQX'], df['UPDRS III Off- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III Off (N={n}): t={ttest_off.statistic}, p={ttest_off.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS III On
    ttest_on = ttest_rel(df['UPDRS III On - PREQX'], df['UPDRS III On- POSTQX'])
    print(f"Paired t-test {group_name} UPDRS III On (N={n}): t={ttest_on.statistic}, p={ttest_on.pvalue:.3f}")
    
    # Prueba t pareada para UPDRS IV
    ttest_iv = ttest_rel(df['UPDRS IV - PRE'], df['UPDRS IV - POSTQX'])
    print(f"Paired t-test {group_name} UPDRS IV (N={n}): t={ttest_iv.statistic}, p={ttest_iv.pvalue:.3f}")
    
    # Guardar los resultados en el diccionario
    results['Group'].append(group_name)
    results['UPDRS III Off'].append(round(ttest_off.pvalue, 3))
    results['UPDRS III On'].append(round(ttest_on.pvalue, 3))
    results['UPDRS IV'].append(round(ttest_iv.pvalue, 3))

# Realizar las pruebas para cada grupo y sexo
print('OPTIMO - Male')
paired_t_tests(df_filtered_optimo_male, 'OPTIMO - Male')

print('OPTIMO - Female')
paired_t_tests(df_filtered_optimo_female, 'OPTIMO - Female')

print('SUBOPTIMO - Male')
paired_t_tests(df_filtered_suboptimo_male, 'SUBOPTIMO - Male')

print('SUBOPTIMO - Female')
paired_t_tests(df_filtered_suboptimo_female, 'SUBOPTIMO - Female')

print('FUERA - Male')
paired_t_tests(df_filtered_fuera_male, 'FUERA - Male')

print('FUERA - Female')
paired_t_tests(df_filtered_fuera_female, 'FUERA - Female')

# Crear la tabla comparativa
results_df = pd.DataFrame(results)
print("\nResultados Comparativos de p-valores")
print(results_df)


### ANCOVA for left hemisphere SEXO

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Crear copias de las columnas con nombres válidos para fórmulas
df_filtered = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'LEAD IZQ': 'LEAD_IZQ',
    'SEXO': 'SEXO'
})

# Crear nuevas columnas para las diferencias en las puntuaciones
df_filtered['UPDRS_III_Off_Diff'] = df_filtered['UPDRS_III_Off_POSTQX'] - df_filtered['UPDRS_III_Off_PREQX']
df_filtered['UPDRS_III_On_Diff'] = df_filtered['UPDRS_III_On_POSTQX'] - df_filtered['UPDRS_III_On_PREQX']
df_filtered['UPDRS_IV_Diff'] = df_filtered['UPDRS_IV_POSTQX'] - df_filtered['UPDRS_IV_PRE']

# Modelo ANCOVA para UPDRS III Off Diff
model_off = ols('UPDRS_III_Off_Diff ~ C(LEAD_IZQ) + SEXO', data=df_filtered).fit()
anova_table_off = sm.stats.anova_lm(model_off, typ=2)
print("ANCOVA for UPDRS III Off Diff")
print(anova_table_off)

# Modelo ANCOVA para UPDRS III On Diff
model_on = ols('UPDRS_III_On_Diff ~ C(LEAD_IZQ) + SEXO', data=df_filtered).fit()
anova_table_on = sm.stats.anova_lm(model_on, typ=2)
print("\nANCOVA for UPDRS III On Diff")
print(anova_table_on)

# Modelo ANCOVA para UPDRS IV Diff
model_iv = ols('UPDRS_IV_Diff ~ C(LEAD_IZQ) + SEXO', data=df_filtered).fit()
anova_table_iv = sm.stats.anova_lm(model_iv, typ=2)
print("\nANCOVA for UPDRS IV Diff")
print(anova_table_iv)


ANCOVA for UPDRS III Off Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_IZQ)   161.864077   2.0  0.968766  0.391110
SEXO          338.951797   1.0  4.057293  0.053023
Residual     2506.241186  30.0       NaN       NaN

ANCOVA for UPDRS III On Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_IZQ)   114.146727   2.0  0.818556  0.450678
SEXO          105.546727   1.0  1.513769  0.228125
Residual     2091.733974  30.0       NaN       NaN

ANCOVA for UPDRS IV Diff
                 sum_sq    df         F    PR(>F)
C(LEAD_IZQ)   46.378247   2.0  1.917621  0.164543
SEXO           3.330002   1.0  0.275374  0.603606
Residual     362.779647  30.0       NaN       NaN


### ANCOVA for right hemisphere SEXO

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Crear copias de las columnas con nombres válidos para fórmulas
df_filtered = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'LEAD DER': 'LEAD_DER',
    'SEXO': 'SEXO'
})

# Crear nuevas columnas para las diferencias en las puntuaciones
df_filtered['UPDRS_III_Off_Diff'] = df_filtered['UPDRS_III_Off_POSTQX'] - df_filtered['UPDRS_III_Off_PREQX']
df_filtered['UPDRS_III_On_Diff'] = df_filtered['UPDRS_III_On_POSTQX'] - df_filtered['UPDRS_III_On_PREQX']
df_filtered['UPDRS_IV_Diff'] = df_filtered['UPDRS_IV_POSTQX'] - df_filtered['UPDRS_IV_PRE']

# Modelo ANCOVA para UPDRS III Off Diff
model_off = ols('UPDRS_III_Off_Diff ~ C(LEAD_DER) + SEXO', data=df_filtered).fit()
anova_table_off = sm.stats.anova_lm(model_off, typ=2)
print("ANCOVA for UPDRS III Off Diff")
print(anova_table_off)

# Modelo ANCOVA para UPDRS III On Diff
model_on = ols('UPDRS_III_On_Diff ~ C(LEAD_DER) + SEXO', data=df_filtered).fit()
anova_table_on = sm.stats.anova_lm(model_on, typ=2)
print("\nANCOVA for UPDRS III On Diff")
print(anova_table_on)

# Modelo ANCOVA para UPDRS IV Diff
model_iv = ols('UPDRS_IV_Diff ~ C(LEAD_DER) + SEXO', data=df_filtered).fit()
anova_table_iv = sm.stats.anova_lm(model_iv, typ=2)
print("\nANCOVA for UPDRS IV Diff")
print(anova_table_iv)


ANCOVA for UPDRS III Off Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_DER)   609.027109   2.0  4.436649  0.020514
SEXO          167.810735   1.0  2.444940  0.128393
Residual     2059.078154  30.0       NaN       NaN

ANCOVA for UPDRS III On Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_DER)   317.030240   2.0  2.517644  0.097551
SEXO           29.235397   1.0  0.464336  0.500831
Residual     1888.850462  30.0       NaN       NaN

ANCOVA for UPDRS IV Diff
                 sum_sq    df         F    PR(>F)
C(LEAD_DER)   71.075843   2.0  3.153488  0.057144
SEXO           3.170474   1.0  0.281335  0.599732
Residual     338.082051  30.0       NaN       NaN


### ANCOVA For left hemisphere DURATION

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Crear copias de las columnas con nombres válidos para fórmulas
df_filtered = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'LEAD IZQ': 'LEAD_IZQ',
    'DD': 'DURATION'
})

# Crear nuevas columnas para las diferencias en las puntuaciones
df_filtered['UPDRS_III_Off_Diff'] = df_filtered['UPDRS_III_Off_POSTQX'] - df_filtered['UPDRS_III_Off_PREQX']
df_filtered['UPDRS_III_On_Diff'] = df_filtered['UPDRS_III_On_POSTQX'] - df_filtered['UPDRS_III_On_PREQX']
df_filtered['UPDRS_IV_Diff'] = df_filtered['UPDRS_IV_POSTQX'] - df_filtered['UPDRS_IV_PRE']

# Modelo ANCOVA para UPDRS III Off Diff
model_off = ols('UPDRS_III_Off_Diff ~ C(LEAD_IZQ) + DURATION', data=df_filtered).fit()
anova_table_off = sm.stats.anova_lm(model_off, typ=2)
print("ANCOVA for UPDRS III Off Diff")
print(anova_table_off)

# Modelo ANCOVA para UPDRS III On Diff
model_on = ols('UPDRS_III_On_Diff ~ C(LEAD_IZQ) + DURATION', data=df_filtered).fit()
anova_table_on = sm.stats.anova_lm(model_on, typ=2)
print("\nANCOVA for UPDRS III On Diff")
print(anova_table_on)

# Modelo ANCOVA para UPDRS IV Diff
model_iv = ols('UPDRS_IV_Diff ~ C(LEAD_IZQ) + DURATION', data=df_filtered).fit()
anova_table_iv = sm.stats.anova_lm(model_iv, typ=2)
print("\nANCOVA for UPDRS IV Diff")
print(anova_table_iv)


ANCOVA for UPDRS III Off Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_IZQ)   123.817159   2.0  0.716711  0.496528
DURATION      253.830811   1.0  2.938580  0.096802
Residual     2591.362171  30.0       NaN       NaN

ANCOVA for UPDRS III On Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_IZQ)    74.992119   2.0  0.512255  0.604289
DURATION        1.339709   1.0  0.018303  0.893289
Residual     2195.940993  30.0       NaN       NaN

ANCOVA for UPDRS IV Diff
                 sum_sq    df        F    PR(>F)
C(LEAD_IZQ)   31.876350   2.0  1.35697  0.272788
DURATION      13.747300   1.0  1.17044  0.287931
Residual     352.362349  30.0      NaN       NaN


### ANCOVA for right hemisphere DURATION

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Crear copias de las columnas con nombres válidos para fórmulas
df_filtered = df_exploracion.rename(columns={
    'UPDRS III Off - PREQX': 'UPDRS_III_Off_PREQX',
    'UPDRS III Off- POSTQX': 'UPDRS_III_Off_POSTQX',
    'UPDRS III On - PREQX': 'UPDRS_III_On_PREQX',
    'UPDRS III On- POSTQX': 'UPDRS_III_On_POSTQX',
    'UPDRS IV - PRE': 'UPDRS_IV_PRE',
    'UPDRS IV - POSTQX': 'UPDRS_IV_POSTQX',
    'LEAD DER': 'LEAD_DER',
    'DD': 'DURATION'
})

# Crear nuevas columnas para las diferencias en las puntuaciones
df_filtered['UPDRS_III_Off_Diff'] = df_filtered['UPDRS_III_Off_POSTQX'] - df_filtered['UPDRS_III_Off_PREQX']
df_filtered['UPDRS_III_On_Diff'] = df_filtered['UPDRS_III_On_POSTQX'] - df_filtered['UPDRS_III_On_PREQX']
df_filtered['UPDRS_IV_Diff'] = df_filtered['UPDRS_IV_POSTQX'] - df_filtered['UPDRS_IV_PRE']

# Modelo ANCOVA para UPDRS III Off Diff
model_off = ols('UPDRS_III_Off_Diff ~ C(LEAD_DER) + DURATION', data=df_filtered).fit()
anova_table_off = sm.stats.anova_lm(model_off, typ=2)
print("ANCOVA for UPDRS III Off Diff")
print(anova_table_off)

# Modelo ANCOVA para UPDRS III On Diff
model_on = ols('UPDRS_III_On_Diff ~ C(LEAD_DER) + DURATION', data=df_filtered).fit()
anova_table_on = sm.stats.anova_lm(model_on, typ=2)
print("\nANCOVA for UPDRS III On Diff")
print(anova_table_on)

# Modelo ANCOVA para UPDRS IV Diff
model_iv = ols('UPDRS_IV_Diff ~ C(LEAD_DER) + DURATION', data=df_filtered).fit()
anova_table_iv = sm.stats.anova_lm(model_iv, typ=2)
print("\nANCOVA for UPDRS IV Diff")
print(anova_table_iv)


ANCOVA for UPDRS III Off Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_DER)   695.616173   2.0  5.166584  0.011800
DURATION      207.325731   1.0  3.079761  0.089481
Residual     2019.563158  30.0       NaN       NaN

ANCOVA for UPDRS III On Diff
                  sum_sq    df         F    PR(>F)
C(LEAD_DER)   352.883694   2.0  2.759708  0.079402
DURATION        0.036440   1.0  0.000570  0.981111
Residual     1918.049418  30.0       NaN       NaN

ANCOVA for UPDRS IV Diff
                 sum_sq    df         F    PR(>F)
C(LEAD_DER)   69.084670   2.0  3.288138  0.051148
DURATION      26.098496   1.0  2.484356  0.125471
Residual     315.154029  30.0       NaN       NaN
