# Supp: clinical scores group comparisons

- Project:        HCP-EP
- File Name:      clinical_group_differences
- Author:         Haley Wang
- Date Created:   2023-9-22
- Last Modified:  2023-9-22
- Code Status:    Testing

In [None]:
import numpy as np
import pandas as pd
import pyls
import h5py
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from netneurotools import datasets, stats, plotting
from scipy import stats
from scipy.stats import zscore, pearsonr, ttest_ind
from scipy.spatial.distance import squareform, pdist

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
#pd.reset_option('all')

### Import clinical data from both EP and MEND

In [None]:
"""
load data (age and sex were regressed out)
"""

EP = pd.read_csv('/u/project/CCN/cbearden/haleywan/qunex_studyfolder/analysis/scripts/behavioral/HCPEP_Cli_Behav_data.csv')
MEND = pd.read_csv('/u/project/CCN/cbearden/haleywan/qunex_studyfolder/analysis/scripts/PLS_MEND_Replication/clinical_all.csv')

### For EP

In [None]:
EP = EP[EP['cohort'] != 3]
EP['dx'] = pd.to_numeric(EP['dx'], errors='coerce')

In [None]:
# Define a function to set the values of column "A" based on column "B"
def set_value(row):
    if row['dx'] in [295.90, 295.40, 298.80, 297.10]:
        return 1
    elif row['dx'] in [295.70]:
        return 2
    elif row['dx'] in [296.24, 296.44, 296.89, 296.54, 296.34, 296.40, 296.53]:
        return 3

# Apply the function to create column "A"
EP['Group'] = EP.apply(set_value, axis=1)

In [None]:
EP['Group']

In [None]:
# Get column names as a list
column_names = EP.columns.tolist()

# Print or use the list
print(column_names)


In [None]:
# Calculate the sum of those columns row-wise and store it in a new column 'pos_total'
EP['total_positive'] = EP.filter(like='pos_').sum(axis=1)
EP['total_negtive'] = EP.filter(like='neg_').sum(axis=1)
EP['total_general'] = EP.filter(like='gps_').sum(axis=1)
EP['total_mania'] = EP.filter(like='ymrs_').sum(axis=1)


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [None]:
# Perform ANCOVA
formula = 'pos_total ~ C(Group) + interview_age_month + sex'
model = ols(formula, data=EP).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'neg_total ~ C(Group) + interview_age_month + sex'
model = ols(formula, data=EP).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'gps_total ~ C(Group) + interview_age_month + sex'
model = ols(formula, data=EP).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'ymrs_total ~ C(Group) + interview_age_month + sex'
model = ols(formula, data=EP).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
import statsmodels.stats.multitest as smm

# Given p-values from the four ANOVA tests
p_values = [0.000083, 0.003159, 0.015050, 0.347700]

# Apply the Benjamini-Hochberg correction
_, pvals_corrected, _, _ = smm.multipletests(p_values, alpha=0.05, method='fdr_bh')

# The corrected p-values (q-values) are stored in pvals_corrected
print("q-values:", pvals_corrected)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = EP.groupby('Group')['pos_total'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = EP.groupby('Group')['neg_total'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = EP.groupby('Group')['gps_total'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = EP.groupby('Group')['ymrs_total'].agg(['mean', 'std'])

print(grouped_stats)

### Violin plots

In [None]:
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, to_rgb, to_hex
pal_bgo_3 = ["#31A9E7", "#305861", "#F19100"]
cmap_bgo_3 = ListedColormap([to_rgb(_) for _ in pal_bgo_3])

In [None]:
import colorsys

def scale_lightness(rgb, scale_l):
    # convert rgb to hls
    h, l, s = colorsys.rgb_to_hls(*rgb)
    # manipulate h, l, s values and return as rgb
    return colorsys.hls_to_rgb(h, min(1, l * scale_l), s = s)

In [None]:
ListedColormap([to_hex(scale_lightness(to_rgb(pal_bgo_3[0]), _)) for _ in [0.1, 0.3, 0.5, 0.7, 0.9]])

In [None]:
cmap_bgo_3

In [None]:
from matplotlib.colors import to_rgb, to_hex
import colorsys
import seaborn as sns
import matplotlib.pyplot as plt

# Original color palette
pal_bgo_3 = ["#31A9E7", "#305861", "#F19100"]

# Function to scale lightness of an RGB color
def scale_lightness(rgb, scale_l):
    h, l, s = colorsys.rgb_to_hls(*rgb)
    return colorsys.hls_to_rgb(h, min(1, l * scale_l), s = s)

# Create a list of brighter colors
bright_colors = [scale_lightness(to_rgb(color), 1.05) for color in pal_bgo_3]

# Convert to hex for seaborn compatibility
bright_colors_hex = [to_hex(color) for color in bright_colors]

# Create the plot
ax = sns.violinplot(
    data=EP,  # Your data here
    x="Group", y="ymrs_total", 
    palette=bright_colors_hex,
    inner=None,
    scale="area",
    linewidth=0
)

new_labels = ['SZ', 'SZAD', 'BP']  # Define your new labels here
ax.set_xticks(range(len(new_labels)))  # This sets the positions of the ticks
ax.set_xticklabels(new_labels) 
ax.set_xlabel('')
ax.set_ylabel('YMRS Mania Symptom Severity')
# Remove right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)


### For MEND

### For EP

In [None]:
# Get column names as a list
column_names = MEND.columns.tolist()

# Print or use the list
print(column_names)


In [None]:
# Calculate the sum of those columns row-wise and store it in a new column 'pos_total'
EP['total_positive'] = EP.filter(like='pos_').sum(axis=1)
EP['total_negtive'] = EP.filter(like='neg_').sum(axis=1)
EP['total_general'] = EP.filter(like='gps_').sum(axis=1)
EP['total_mania'] = EP.filter(like='ymrs_').sum(axis=1)


In [None]:
# Using map
MEND['Group'] = MEND['PatientType2'].map({2: 1, 3: 2, 5: 3})
MEND['BPRS_pos'] = MEND[['BPRS_ConceptualDisorg', 'BPRS_Grandiosity', 'BPRS_Hostility', 'BPRS_Suspiciousness',
                         'BPRS_HallucinatoryBehav', 'BPRS_Uncooperativeness', 'BPRS_UnusualThoughtContent', 'BPRS_Excitement']].sum(axis=1)
MEND['BPRS_nonpos'] = MEND[['BPRS_Anxiety', 'BPRS_Disorientation', 'BPRS_GuiltFeelings', 'BPRS_MannerismsPosturing', 'BPRS_MotorRetard', 'BPRS_EmotionalWithdrawal', 'BPRS_Tension', 'BPRS_BluntedAffect', 
                            'BPRS_DepressiveMood', 'BPRS_SomaticConcern']].sum(axis=1)


# Filter columns whose names start with 'SANS_'
sans_cols = [col for col in MEND.columns if col.startswith('SANS_')]

# Create a new column that is the sum of all 'SANS_' columns
MEND['sans_total'] = MEND[sans_cols].sum(axis=1)

# Filter columns whose names start with 'SANS_'
ymrs_cols = [col for col in MEND.columns if col.startswith('YMRS_')]

# Create a new column that is the sum of all 'SANS_' columns
MEND['ymrs_total'] = MEND[ymrs_cols].sum(axis=1)


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [None]:
# Perform ANCOVA
formula = 'BPRS_pos ~ C(Group) + AgeAtBaseline + sex'
model = ols(formula, data=MEND).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'BPRS_nonpos ~ C(Group) + AgeAtBaseline + sex'
model = ols(formula, data=MEND).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'sans_total ~ C(Group) + AgeAtBaseline + sex'
model = ols(formula, data=MEND).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
# Perform ANCOVA
formula = 'ymrs_total ~ C(Group) + AgeAtBaseline + sex'
model = ols(formula, data=MEND).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table

In [None]:
import statsmodels.stats.multitest as smm

# Given p-values from the four ANOVA tests
p_values = [0.23, 0.34, 0.24, 0.005948]

# Apply the Benjamini-Hochberg correction
_, pvals_corrected, _, _ = smm.multipletests(p_values, alpha=0.05, method='fdr_bh')

# The corrected p-values (q-values) are stored in pvals_corrected
print("q-values:", pvals_corrected)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = MEND.groupby('Group')['BPRS_pos'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = MEND.groupby('Group')['BPRS_nonpos'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = MEND.groupby('Group')['sans_total'].agg(['mean', 'std'])

print(grouped_stats)

In [None]:
# Calculate the mean and SD for 'feature' for each group
grouped_stats = MEND.groupby('Group')['ymrs_total'].agg(['mean', 'std'])

print(grouped_stats)

### Violin plots

In [None]:
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, to_rgb, to_hex
pal_bgo_3 = ["#31A9E7", "#305861", "#F19100"]
cmap_bgo_3 = ListedColormap([to_rgb(_) for _ in pal_bgo_3])

In [None]:
import colorsys

def scale_lightness(rgb, scale_l):
    # convert rgb to hls
    h, l, s = colorsys.rgb_to_hls(*rgb)
    # manipulate h, l, s values and return as rgb
    return colorsys.hls_to_rgb(h, min(1, l * scale_l), s = s)

In [None]:
ListedColormap([to_hex(scale_lightness(to_rgb(pal_bgo_3[0]), _)) for _ in [0.1, 0.3, 0.5, 0.7, 0.9]])

In [None]:
cmap_bgo_3

In [None]:
from matplotlib.colors import to_rgb, to_hex
import colorsys
import seaborn as sns
import matplotlib.pyplot as plt

# Original color palette
pal_bgo_3 = ["#31A9E7", "#305861", "#F19100"]

# Function to scale lightness of an RGB color
def scale_lightness(rgb, scale_l):
    h, l, s = colorsys.rgb_to_hls(*rgb)
    return colorsys.hls_to_rgb(h, min(1, l * scale_l), s = s)

# Create a list of brighter colors
bright_colors = [scale_lightness(to_rgb(color), 1.05) for color in pal_bgo_3]

# Convert to hex for seaborn compatibility
bright_colors_hex = [to_hex(color) for color in bright_colors]

# Create the plot
ax = sns.violinplot(
    data=MEND,  # Your data here
    x="Group", y="BPRS_nonpos", 
    palette=bright_colors_hex,
    inner=None,
    scale="area",
    linewidth=0
)

new_labels = ['SZ', 'SZAD', 'BP']  # Define your new labels here
ax.set_xticks(range(len(new_labels)))  # This sets the positions of the ticks
ax.set_xticklabels(new_labels) 
ax.set_xlabel('')
ax.set_ylabel('BPRS Total Non-Psychotic Symptom Severity')
# Remove right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
