In [49]:
import numpy as np
import pandas as pd
import os as os
from scipy.stats import pearsonr, ttest_ind

In [64]:
# Read in analysis of control patients, skipping the first row and first column
n_df = pd.read_csv('n_CM.csv')

In [None]:
# Convert all columns to numeric
n_df = n_df.apply(pd.to_numeric, errors='coerce')

# Calculate the mean, ignoring NaN values
n_df = n_df.mean(axis=0)

In [65]:
# Read in analysis of TBI patients
tb_df = pd.read_csv('tb_CM.csv') 

In [None]:
# Convert all columns to numeric
tb_df = tb_df.apply(pd.to_numeric, errors='coerce')

# Calculate the mean, ignoring NaN values
tb_df = tb_df.mean(axis=0)

In [70]:
# Ensure both dataframes have the same columns
common_columns = n_df.index.intersection(tb_df.index)

In [None]:
t_test_results = {}
for column in common_columns:
    t_stat, p_value = ttest_ind(n_df[column], tb_df[column])
    t_test_results[column] = {'t_stat': t_stat, 'p_value': p_value}

In [56]:
for index, result in t_test_results.items():
    print(f"Column: {index}, t-statistic: {result['t_stat']}, p-value: {result['p_value']}")

Actually comparing TB and N by column, formatted in a readable way

In [None]:
# Read the CSV files without skipping any rows or columns
n_df = pd.read_csv('n_CM.csv')
tb_df = pd.read_csv('tb_CM.csv')

# Separate the first row and first column
n_first_row = n_df.iloc[0, 1:]
tb_first_row = tb_df.iloc[0, 1:]
first_column = n_df.iloc[:, 0]

# Exclude the first row and first column for t-test calculations
n_df = n_df.iloc[1:, 1:].apply(pd.to_numeric, errors='coerce').dropna()
tb_df = tb_df.iloc[1:, 1:].apply(pd.to_numeric, errors='coerce').dropna()

# Ensure both dataframes have the same columns
common_columns = n_df.columns.intersection(tb_df.columns)

# Perform t-tests on each column independently
t_test_results = {}
for column in common_columns:
    t_stat, p_value = ttest_ind(n_df[column], tb_df[column])
    t_test_results[column] = {'t_stat': t_stat, 'p_value': p_value}

# Create a results dataframe for readability
results_df = pd.DataFrame(t_test_results).T
results_df['Control Mean'] = n_df.mean()
results_df['TBI Mean'] = tb_df.mean()

# Add the first row and first column back for readability
results_df.loc['First Row'] = pd.Series(n_first_row)
results_df['First Column'] = first_column

# Print the results
print(results_df)

In [75]:
results_df.to_csv('t_test_results.csv')

t test comparing the first manually filtered version N and TB CM csvs, not significant boiz

In [None]:
# Read the CSV files, skipping the first row and first column
n_df = pd.read_csv('n_CM.csv', skiprows=1).iloc[:, 1:]
tb_df = pd.read_csv('tb_CM.csv', skiprows=1).iloc[:, 1:]

# Convert all columns to numeric, coercing errors to NaN
n_df = n_df.apply(pd.to_numeric, errors='coerce')
tb_df = tb_df.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
n_df = n_df.dropna()
tb_df = tb_df.dropna()

# Flatten the dataframes into single series
n_series = n_df.values.flatten()
tb_series = tb_df.values.flatten()

# Perform the overall t-test
t_stat, p_value = ttest_ind(n_series, tb_series)

# Print the overall t-test result
print(f"Overall t-test: t-statistic = {t_stat}, p-value = {p_value}")