<a href="https://colab.research.google.com/github/NkdPngn/JannComplexity24/blob/main/DemogQC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First, check your cohort...

* save yourself heartache and headaches by checking whether your control and experimental subjects are actually as matched as you think*  

In [None]:
# Neuroimaging Cohort QC Tool
# Created by Jolane Abrams, based on USC fMRI research experience

import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
from google.colab import files

def covariate_qc(df):
    """Automated demographic checks for neuroimaging studies."""
    # Statistical tests
    results = {}
    for col in ['age', 'puberty']:  # Add covariates
        if col in df.columns:
            # T-test for continuous variables
            t, p = stats.ttest_ind(
                df[col][df.group == 1],
                df[col][df.group == 2],
                nan_policy='omit'
            )
            results[col] = {'p': round(p, 4), 'test': 't-test'}

    # Visualization
    plt.figure(figsize=(12, 4))
    plt.subplot(121)
    sns.boxplot(x='group', y='age', data=df)
    plt.title(f"Age (p={results['age']['p']})")

    plt.subplot(122)
    sns.histplot(data=df, x='puberty', hue='group', bins=5)
    plt.title(f"Puberty Stage (p={results['puberty']['p']})")

    return results

# Example with simulated data
data = pd.DataFrame({
    'group': [1]*50 + [2]*50,
    'age': np.concatenate([
        np.random.normal(20, 2, 50),  # Group 1: mean age 20
        np.random.normal(23, 2, 50)   # Group 2: mean age 23 (intentional mismatch)
    ]),
    'puberty': np.random.randint(1, 6, 100)
})

# Run analysis
results = covariate_qc(data)
plt.show()
print("Statistical results:\n", results)

# @title Upload your own data (CSV/Excel)
uploaded = files.upload()
if uploaded:
    df = pd.read_csv(next(iter(uploaded.keys())))
    user_results = covariate_qc(df)