In [8]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, ttest_ind

In [4]:
# Read in analysis of control patients
n_df = pd.read_csv('n_results.csv')
n_df = n_df.mean(axis=0)

In [5]:
# Read in analysis of TBI patients
tb_df = pd.read_csv('tb_results.csv')
tb_df = tb_df.mean(axis=0)

In [6]:
# Combine dataframes
compare_df = pd.DataFrame()
compare_df['\'Normal\' Patients'] = n_df.iloc[:,]
compare_df['TBI Patients'] = tb_df.iloc[:,]

In [7]:
# Pearson coefficient value and p-value
pearsonr(compare_df['\'Normal\' Patients'], compare_df['TBI Patients'])

PearsonRResult(statistic=np.float64(0.99871309557843), pvalue=np.float64(6.419873810429265e-47))

t tests by each category 

In [9]:
n_df = pd.read_csv('n_results.csv')
tb_df = pd.read_csv('tb_results.csv')

In [10]:
# Separate the first row and first column
n_first_row = n_df.iloc[0, 1:]
tb_first_row = tb_df.iloc[0, 1:]
first_column = n_df.iloc[:, 0]

In [11]:
# Exclude the first row and first column for t-test calculations
n_df = n_df.iloc[1:, 1:].apply(pd.to_numeric, errors='coerce').dropna()
tb_df = tb_df.iloc[1:, 1:].apply(pd.to_numeric, errors='coerce').dropna()

In [12]:
# Ensure both dataframes have the same columns
common_columns = n_df.columns.intersection(tb_df.columns)

In [13]:
# Perform t-tests on each column independently
t_test_results = {}
for column in common_columns:
    t_stat, p_value = ttest_ind(n_df[column], tb_df[column])
    t_test_results[column] = {'t_stat': t_stat, 'p_value': p_value}

In [14]:
# Create a results dataframe
results_df = pd.DataFrame(t_test_results).T
results_df['Control Mean'] = n_df.mean()
results_df['TBI Mean'] = tb_df.mean()

In [15]:
# Add the first row and first column back for readability
results_df.loc['First Row'] = pd.Series(n_first_row)
results_df['First Column'] = first_column

In [16]:
print(results_df)

                                          t_stat   p_value  Control Mean  \
adjacent_overlap_all_sent_div_seg       0.203683  0.839032      0.664537   
adjacent_overlap_binary_all_sent        0.612736  0.541500      0.355112   
adjacent_overlap_cw_sent                0.098173  0.922000      0.065657   
adjacent_overlap_cw_sent_div_seg        0.594227  0.553758      0.178970   
adjacent_overlap_binary_cw_sent         0.701076  0.484951      0.146554   
adjacent_overlap_verb_sent             -0.767472  0.444684      0.041052   
adjacent_overlap_verb_sent_div_seg     -0.182984  0.855197      0.038559   
adjacent_overlap_binary_verb_sent      -0.266203  0.790654      0.037754   
adjacent_overlap_argument_sent         -0.747124  0.456815      0.097673   
adjacent_overlap_argument_sent_div_seg -0.443436  0.658448      0.183893   
adjacent_overlap_binary_argument_sent  -0.118908  0.905596      0.162511   
basic_connectives                       0.738662  0.461914      0.076758   
conjunctions

In [17]:
results_df.to_csv('t_test_taaco.csv')