In [1]:
import os

###################################################################################
# Check that these are the correct paths running (change if necessary)

# Finn's Laptop
studydir = '/mnt/d/KPUM_NODDI/Data'
codedir = os.path.join('/home/finn/Code/KPUM_NODDI_ssd','kpum_noddi')
codedir = os.path.join('/home/finn/Code/','kpum_noddi')

# KPUM Workstation
#studydir = '/mnt/e/Finn/KPUM_NODDI/Data'
#codedir = os.path.join('/home/radio/KPUM_NODDI','code/kpum_noddi')

###################################################################################
# USER INPUT - START

# Study details
derivatives = '' # e.g. derivatives/dMRI_np' or 'derivatives/dMRI_op'
protocol = '' # 'NEW' or 'ORIG'
clinicaldata = 'NODDI_Clinical_Information_FL_suggestion_20250326.xlsx' # Up-to-date file with clinical data

# USER INPUT - END
##################################################################################

# tsv-files to keep track of in studydir/niftibasefolder
subjecttrackerfile = 'Subject_Tracker_for_dmri_dtidkinoddi_pipeline.tsv'


In [2]:
import pandas as pd
import numpy as np
import os
import glob

# Define I/O folders and files
clinicaldatafolder = 'clinicaldata'
clinicaldatafile = os.path.join(studydir, clinicaldatafolder, clinicaldata)

def perform_process(processcall) :
    import os, subprocess
    # Perform the process given by the processcall by launching into terminal
    p=subprocess.Popen(processcall, stdout=subprocess.PIPE, shell=True)
    # Poll process.stdout to show stdout live
    while True:
        output = p.stdout.readline()
        if p.poll() is not None:
            break
        if output:
            print(output.strip().decode("utf-8"))
    rc = p.poll()


########################################
## START

currdir = os.getcwd()
os.chdir(studydir)

# Read the Clinicaldata-file.xlsx
df_clinical = pd.read_excel(clinicaldatafile, dtype=str, sheet_name='Patient Data') # Read as string to preserve empty cells
# Replace empty strings and whitespace-only strings with NaN
df_clinical = df_clinical.applymap(lambda x: np.nan if pd.isna(x) or str(x).strip() == "" else x)
# Remove columns where all values are NaN
df_clinical = df_clinical.dropna(axis=1, how='all')
# Drop rows where all columns except 'Study ID' are NaN
df_clinical = df_clinical.dropna(subset=df_clinical.columns[1:], how='all')
# Reset index if needed
df_clinical = df_clinical.reset_index(drop=True)


# Print full dataframe without truncation
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
print("\nFull dataframe after removing all-NaN columns:")
print(df_clinical)

# Display basic information about the patient dataset
print("Patient Data Summary:")
print("-" * 50)
print(f"Number of patients: {len(df_clinical)}")
print("\nFirst few rows of patient data:")
display(df_clinical.head())
print("\nColumns in patient data:")
print(df_clinical.columns.tolist())








In [3]:
#
# PRINT SOME DEMOGRAPHIC DATA
# 

import seaborn as sns
import matplotlib.pyplot as plt

# Create a DataFrame with demographic info for subjects with connectome data
df_clinical_demo = pd.DataFrame({
    'Subject_ID': df_clinical['Study ID'],
    'Sex': df_clinical['Sex'],
    'GAatBirth_Week': df_clinical['GAatBirth Week'],
    'GAatBirth_Days': df_clinical['GAatBirth Day'],
    'Birth_Weight': df_clinical['BodyWeight (g)']
    })

df_clinical_demo['GAatBirth_Week'] = pd.to_numeric(df_clinical_demo['GAatBirth_Week'], errors='coerce') # Ensure GAatBirth_Week
df_clinical_demo['GAatBirth_Days'] = pd.to_numeric(df_clinical_demo['GAatBirth_Days'], errors='coerce') # Ensure GAatBirth_Week
df_clinical_demo['Birth_Weight'] = pd.to_numeric(df_clinical_demo['Birth_Weight'], errors='coerce') # Ensure Birth_Weight is numeric
# Add a new column for the combined gestational age (GAatBirth_Week + GAatBirth_Days / 7)
df_clinical_demo['Combined_GA'] = df_clinical_demo['GAatBirth_Week'] + (df_clinical_demo['GAatBirth_Days'] / 7)

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Plot sex distribution
df_clinical_demo_sorted = df_clinical_demo.sort_values(by='GAatBirth_Week', ascending=True)

sns.countplot(data=df_clinical_demo_sorted, x='Sex', ax=ax1)
ax1.set_title('Sex Distribution of Subjects')
ax1.set_xlabel('Sex')
ax1.set_ylabel('Count')

# Add count labels on top of bars
for i in ax1.containers:
    ax1.bar_label(i)

# Plot GAatBirth_Week distribution
sns.histplot(data=df_clinical_demo_sorted, x='GAatBirth_Week', bins=10, ax=ax2)
ax2.set_title('Gestational Week Distribution')
ax2.set_xlabel('Gestational Week')
ax2.set_ylabel('Count')

# Sort the data by GAatBirth_Week and Birth Weight for better visualization
df_clinical_demo_sorted = df_clinical_demo.sort_values(by=['GAatBirth_Week', 'Birth_Weight'], ascending=[True, True])

# Create a scatter plot of Birth Weight against Gestational Week
plt.figure(figsize=(10, 6))
sns.scatterplot(x="Combined_GA", y="Birth_Weight", data=df_clinical_demo_sorted, hue="Sex", palette="Set2")

# Add a title and axis labels
plt.title("Scatter Plot of Birth Weight vs. Gestational Week", fontsize=16, fontweight='bold')
plt.xlabel("Gestational Week (GW)", fontsize=14)
plt.ylabel("Birth Weight (g)", fontsize=14)

# Customize tick parameters for better visibility
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add gridlines for better readability
plt.grid(axis='both', linestyle='--', alpha=0.7)

# Display the plot
plt.show()

# Print summary statistics
print("\nDemographic Summary for Subjects:")
print("-" * 50)
print(f"Total subjects: {len(df_clinical_demo_sorted)}")
print("\nSex Distribution:")
print(df_clinical_demo_sorted['Sex'].value_counts())
print("\nGestational Week Statistics:")
print(df_clinical_demo_sorted['GAatBirth_Week'].describe())
print("\nBirth Weigh Statistics:")
print(df_clinical_demo_sorted['Birth_Weight'].describe())






