In [1]:
import numpy as np
import pandas as pd
from scipy.stats import zscore

from scipy.stats import ttest_ind, mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

In [2]:
UOP_Celldensities =     pd.read_csv('../DataUOP/UOPfinal_celldensities.csv', index_col=0)
UOP_Function =          pd.read_csv('../DataUOP/UOPfinal_functional.csv', index_col=0)
UOP_Metavariables =     pd.read_csv('../DataUOP/UOPfinal_metavariables.csv', index_col=0)
UOP_Neighborhood =      pd.read_csv('../DataUOP/UOPfinal_neighborhood.csv', index_col=0)

UOP_data = {
    'UOP_Celldensities': UOP_Celldensities,
    'UOP_Function': UOP_Function,
    'UOP_Metavariables': UOP_Metavariables,
    'UOP_Neighborhood': UOP_Neighborhood
}

UOP_y = pd.read_csv('../DataUOP/UOPfinal_outcome.csv',index_col=0)
UOP_y = UOP_y.grade-1

In [7]:
alpha = 0.05
num_permutations = 100

In [4]:
for key, df in UOP_data.items():
    univariate = pd.DataFrame()
    permutation_p_values = pd.DataFrame()

    for column in df.select_dtypes(include=['float64', 'int64']):
        x = df[column].dropna()
        outcome = UOP_y
        
        group1 = x[outcome == 0]  # Values in x corresponding to y.grade == 1
        group2 = x[outcome == 1]  # Values in x corresponding to y.grade == 2
        
        meandifference = group2.mean() - group1.mean()
        statistic, p_value_ttest = ttest_ind(group1, group2)
        statistic, p_value_mannwhitneyu = mannwhitneyu(group1, group2)
        
        # Permutation testing
        perm_p_values = []
        for _ in range(num_permutations):
            perm_outcome = np.random.permutation(outcome)
            perm_group1 = x[perm_outcome == 0]
            perm_group2 = x[perm_outcome == 1]
            
            perm_statistic, _ = ttest_ind(perm_group1, perm_group2)  # Use the same test as the original
            
            perm_p_values.append(perm_statistic)
        
        # Calculate permutation p-value
        perm_p_value = (np.abs(perm_p_values) >= np.abs(statistic)).mean()
        
        univariate.loc[column, 'Statistic'] = statistic
        univariate.loc[column, 'meandifference higher-lower'] = meandifference
        univariate.loc[column, 'p-value ttest'] = p_value_ttest
        univariate.loc[column, 'p-value mannwhitneyu'] = p_value_mannwhitneyu
        permutation_p_values.loc[column, 'Permutation p-value'] = perm_p_value
    
    # Apply FDR correction
    fdr_adjusted_p_values = fdrcorrection(univariate['p-value ttest'], alpha=alpha)
    univariate['FDR-adjusted p-value'] = fdr_adjusted_p_values[1]

    filename = f'Univariate_results_{key}.csv'
    univariate.to_csv(filename)

In [3]:
STA_Celldensities =     pd.read_csv('../DataStanford2/another_STA_celldensities.csv', index_col=0)
STA_Function =          pd.read_csv('../DataStanford2/another_STA_functional.csv', index_col=0)
STA_Metavariables =     pd.read_csv('../DataStanford2/another_STA_metavariables.csv', index_col=0)
STA_Neighborhood =      pd.read_csv('../DataStanford2/another_STA_neighborhood.csv', index_col=0)

STA_data = {
    'STA_Celldensities': STA_Celldensities,
    'STA_Function': STA_Function,
    'STA_Metavariables': STA_Metavariables,
    'STA_Neighborhood': STA_Neighborhood
}

STA_y = pd.read_csv('../DataStanford2/another_STA_outcome.csv',index_col=0)
STA_y = STA_y.grade-1

In [13]:
for key, df in STA_data.items():
    univariate = pd.DataFrame()
    permutation_p_values = pd.DataFrame()

    for column in df.select_dtypes(include=['float64', 'int64']):
        x = df[column].dropna()
        outcome = STA_y
        
        group1 = x[outcome == 0]  # Values in x corresponding to y.grade == 1
        group2 = x[outcome == 1]  # Values in x corresponding to y.grade == 2
        
        meandifference = group2.mean() - group1.mean()
        statistic, p_value_ttest = ttest_ind(group1, group2)
        statistic, p_value_mannwhitneyu = mannwhitneyu(group1, group2)
        
        # Permutation testing
        perm_p_values = []
        for _ in range(num_permutations):
            perm_outcome = np.random.permutation(outcome)
            perm_group1 = x[perm_outcome == 0]
            perm_group2 = x[perm_outcome == 1]
            
            perm_statistic, _ = ttest_ind(perm_group1, perm_group2)  # Use the same test as the original
            
            perm_p_values.append(perm_statistic)
        
        # Calculate permutation p-value
        perm_p_value = (np.abs(perm_p_values) >= np.abs(statistic)).mean()
        
        univariate.loc[column, 'Statistic'] = statistic
        univariate.loc[column, 'meandifference higher-lower'] = meandifference
        univariate.loc[column, 'p-value ttest'] = p_value_ttest
        univariate.loc[column, 'p-value mannwhitneyu'] = p_value_mannwhitneyu
        permutation_p_values.loc[column, 'Permutation p-value'] = perm_p_value
    
    # Replace NaN values with 1 for FDR correction
    univariate['p-value ttest'].fillna(1, inplace=True)
    
    # Apply FDR correction
    fdr_adjusted_p_values = fdrcorrection(univariate['p-value ttest'], alpha=alpha)
    univariate['FDR-adjusted p-value'] = fdr_adjusted_p_values[1]

    result_data[key] = univariate  # Save the univariate dataframe in the result_data dictionary 

    filename = f'Univariate_results_{key}.csv'
    univariate.to_csv(filename)