In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('abcd_p_screen.csv')

# Print total and value counts for each target column
for col in ['scrn_asd', 'scrn_asd_regclasses', 'scrn_sud']:

    print(f'\nColumn =================== {col}')
    print('Total non-null:', df[col].notnull().sum())
    print(df[col].value_counts(dropna=False))

# Function to get src_subject_id keys where all given columns == 1
def get_keys_all_ones(df, columns):
    mask = (df[columns] == 1).all(axis=1)
    return df.loc[mask, 'src_subject_id'].dropna().unique()

# Example usage — you can change the list below to any columns you want
target_columns = ['scrn_asd', 'scrn_asd_regclasses']#, 'scrn_sud']
passing_keys = get_keys_all_ones(df, target_columns)

# Save the result to a txt file
with open('abcd_qc_passing_keys_195.txt', 'w') as f:
    for key in passing_keys:
        f.write(f"{key}\n")

print(f"\nSaved {len(passing_keys)} passing keys to 'abcd_qc_passing_keys.txt'")


Total non-null: 11828
scrn_asd
0.0    11627
1.0      201
NaN       39
Name: count, dtype: int64

Total non-null: 201
scrn_asd_regclasses
NaN    11666
1.0      195
0.0        6
Name: count, dtype: int64

Total non-null: 11830
scrn_sud
0.0    11829
NaN       37
1.0        1
Name: count, dtype: int64

Saved 195 passing keys to 'abcd_qc_passing_keys.txt'


In [2]:
# Define input and output file names
input_filename = "abcd_qc_passing_keys_195.txt"
output_filename = "abcd_qc_passing_keys_195_no_undersc.txt"

# Read the file and remove underscores from each line
with open(input_filename, "r") as f:
    lines = f.read().splitlines()

lines_no_underscore = [line.replace("_", "") for line in lines]

# Write the modified lines to the new file
with open(output_filename, "w") as f:
    for line in lines_no_underscore:
        f.write(line + "\n")

print(f"Modified file saved as: {output_filename}")

Modified file saved as: abcd_qc_passing_keys_195_no_undersc.txt


In [None]:
import pandas as pd
import os
import re

# === Load abcd_p_screen.csv and extract src_subject_id ===
df = pd.read_csv('abcd_p_screen.csv')
csv_ids_raw = df['src_subject_id'].dropna().unique()
csv_ids = set(id_.replace('_', '') for id_ in csv_ids_raw)

print(f"Total IDs in CSV: {len(csv_ids)}")

# === Extract NDARINV IDs from folder names ===
data_folder = '/BEE/Connectome/ABCD/ImageData/Data'
data_ids = set()
folder_paths = {}

for name in os.listdir(data_folder):
    match = re.match(r"sub-(NDARINV[0-9A-Z]+)", name)
    if match:
        subject_id = match.group(1)
        data_ids.add(subject_id)
        folder_paths[subject_id] = os.path.join(data_folder, name)

print(f"Total IDs in Data folder: {len(data_ids)}")

# === Compare sets ===
common_ids = csv_ids & data_ids
only_in_csv = csv_ids - data_ids
only_in_data = data_ids - csv_ids

print(f"IDs in both CSV and Data: {len(common_ids)}")
print(f"IDs in CSV but NOT in Data: {len(only_in_csv)}")
print(f"IDs in Data but NOT in CSV: {len(only_in_data)}")

# === Check T1w and T2w availability among common IDs ===
count_with_t1_and_t2 = 0

for subject_id in common_ids:
    anat_path = os.path.join(folder_paths[subject_id], 'ses-baselineYear1Arm1', 'anat')
    has_t1w = has_t2w = False

    if os.path.isdir(anat_path):
        for file in os.listdir(anat_path):
            if re.search(r'_T1w\.nii\.gz$', file):
                has_t1w = True
            if re.search(r'_T2w\.nii\.gz$', file):
                has_t2w = True

    if has_t1w and has_t2w:
        count_with_t1_and_t2 += 1

print(f"Common IDs with both T1w and T2w: {count_with_t1_and_t2}")



Total IDs in CSV: 11867
Total IDs in Data folder: 3884
IDs in both CSV and Data: 3877
IDs in CSV but NOT in Data: 7990
IDs in Data but NOT in CSV: 7
Common IDs with both T1w and T2w: 3781


In [31]:
import os
import re

# === Load abcd_qc_passing_keys.txt and normalize IDs ===
with open("abcd_qc_passing_keys.txt", "r") as f:
    txt_ids_raw = [line.strip() for line in f if line.strip()]
txt_ids = set(id_.replace('_', '') for id_ in txt_ids_raw)

print(f"Total IDs in abcd_qc_passing_keys.txt: {len(txt_ids)}")

# === Extract NDARINV IDs from folder names ===
data_folder = '/BEE/Connectome/ABCD/ImageData/Data'
data_ids = set()
folder_paths = {}

for name in os.listdir(data_folder):
    match = re.match(r"sub-(NDARINV[0-9A-Z]+)", name)
    if match:
        subject_id = match.group(1)
        data_ids.add(subject_id)
        folder_paths[subject_id] = os.path.join(data_folder, name)

print(f"Total IDs in Data folder: {len(data_ids)}")

# === Compare sets ===
common_ids = txt_ids & data_ids
only_in_txt = txt_ids - data_ids
only_in_data = data_ids - txt_ids

print(f"IDs in both TXT and Data: {len(common_ids)}")
print(f"IDs in TXT but NOT in Data: {len(only_in_txt)}")
print(f"IDs in Data but NOT in TXT: {len(only_in_data)}")

# === Check T1w and T2w availability among common IDs ===
count_with_t1_and_t2 = 0

for subject_id in common_ids:
    anat_path = os.path.join(folder_paths[subject_id], 'ses-baselineYear1Arm1', 'anat')
    has_t1w = has_t2w = False

    if os.path.isdir(anat_path):
        for file in os.listdir(anat_path):
            if re.search(r'_T1w\.nii\.gz$', file):
                has_t1w = True
            if re.search(r'_T2w\.nii\.gz$', file):
                has_t2w = True

    if has_t1w and has_t2w:
        count_with_t1_and_t2 += 1

print(f"Common IDs with both T1w and T2w: {count_with_t1_and_t2}")



# # Save the result to a txt file
# with open('missing_qc_ids_in_data.txt', 'w') as f:
#     for key in only_in_txt:
#         f.write(f"{key}\n")

Total IDs in abcd_qc_passing_keys.txt: 195
Total IDs in Data folder: 3884
IDs in both TXT and Data: 41
IDs in TXT but NOT in Data: 154
IDs in Data but NOT in TXT: 3843
Common IDs with both T1w and T2w: 40


In [32]:
import pandas as pd
import os
import re

# Load the CSV file
df = pd.read_csv('abcd_p_screen.csv')

# Print total and value counts for each target column
for col in ['scrn_asd', 'scrn_asd_regclasses', 'scrn_sud']:
    print(f'\nColumn =================== {col}')
    print('Total non-null:', df[col].notnull().sum())
    print(df[col].value_counts(dropna=False))

# Function to get src_subject_id keys where all given columns == 1
def get_keys_all_ones(df, columns):
    mask = (df[columns] == 1).all(axis=1)
    return df.loc[mask, 'src_subject_id'].dropna().unique()

# Step 1: Get passing keys from screening CSV
target_columns = ['scrn_asd', 'scrn_asd_regclasses']  # Change if needed
passing_keys = get_keys_all_ones(df, target_columns)
print(f"\nInitial passing keys from CSV (all columns == 1): {len(passing_keys)}")

# Step 2: Normalize keys (remove _ to match folder names)
passing_keys_normalized = {k.replace('_', ''): k for k in passing_keys}

# Step 3: Check which have both T1w and T2w images
data_folder = '/BEE/Connectome/ABCD/ImageData/Data'
folder_paths = {}

for name in os.listdir(data_folder):
    match = re.match(r"sub-(NDARINV[0-9A-Z]+)", name)
    if match:
        subject_id = match.group(1)
        folder_paths[subject_id] = os.path.join(data_folder, name)

# Filter those with both T1w and T2w
final_keys = []

for normalized_id, original_id in passing_keys_normalized.items():
    subject_path = folder_paths.get(normalized_id)
    if not subject_path:
        continue  # Subject folder not found

    anat_path = os.path.join(subject_path, 'ses-baselineYear1Arm1', 'anat')
    has_t1w = has_t2w = False

    if os.path.isdir(anat_path):
        for file in os.listdir(anat_path):
            if re.search(r'_T1w\.nii\.gz$', file):
                has_t1w = True
            if re.search(r'_T2w\.nii\.gz$', file):
                has_t2w = True

    if has_t1w and has_t2w:
        final_keys.append(original_id)

# Step 4: Save the filtered keys
with open('abcd_qc_passing_keys.txt', 'w') as f:
    for key in sorted(final_keys):
        f.write(f"{key}\n")

print(f"\nFinal QC-passing keys with both T1w and T2w: {len(final_keys)}")
print("Saved to 'abcd_qc_passing_keys.txt'")


Total non-null: 11828
scrn_asd
0.0    11627
1.0      201
NaN       39
Name: count, dtype: int64

Total non-null: 201
scrn_asd_regclasses
NaN    11666
1.0      195
0.0        6
Name: count, dtype: int64

Total non-null: 11830
scrn_sud
0.0    11829
NaN       37
1.0        1
Name: count, dtype: int64

Initial passing keys from CSV (all columns == 1): 195

Final QC-passing keys with both T1w and T2w: 40
Saved to 'abcd_qc_passing_keys.txt'


## Below is for the new ABCD dataz copied from Longleaf

In [3]:
import os
import re

# ----------------------
# 1) Load the TXT file
# ----------------------
input_txt = "abcd_qc_passing_keys_195.txt"
with open(input_txt, "r") as f:
    lines = [line.strip() for line in f if line.strip()]

# Keep a map of "normalized_key" -> "original_key_with_underscores"
normalized_to_original = {}
for line in lines:
    norm = line.replace("_", "")
    normalized_to_original[norm] = line

# We only need the normalized set for matching
keys_normalized_set = set(normalized_to_original.keys())
print(f"Total keys (original lines): {len(lines)}")
print(f"Unique normalized keys: {len(keys_normalized_set)}")

# --------------------------------------------------
# 2) Gather subject folders in Data_abcd_asd_scr_pos
# --------------------------------------------------
data_folder = "/BEE/Connectome/ABCD/ImageData/Data_abcd_asd_scr_pos"
folder_names = os.listdir(data_folder)

# We'll extract the NDARINV ID from folder names like: sub-NDARINVXXXXXX
data_ids = set()
for name in folder_names:
    match = re.match(r"sub-(NDARINV[0-9A-Z]+)", name)
    if match:
        data_ids.add(match.group(1))

print(f"Total subject folders in {data_folder}: {len(data_ids)}")

# --------------------------------------------------
# 3) Compare sets
# --------------------------------------------------
common_ids = keys_normalized_set & data_ids
missing_in_data = keys_normalized_set - data_ids
extra_in_data = data_ids - keys_normalized_set

# --------------------------------------------------
# 4) Print counts
# --------------------------------------------------
print("\n=== Comparison Results ===")
print(f"Keys found in data: {len(common_ids)}")
print(f"Keys missing in data: {len(missing_in_data)}")
print(f"Data folders not in key list: {len(extra_in_data)}")

# --------------------------------------------------
# 5) Save the found keys (in original underscore format)
# --------------------------------------------------
found_count = len(common_ids)
output_txt = f"abcd_qc_passing_keys_{found_count}.txt"

with open(output_txt, "w") as f:
    # Sort for consistency
    for norm_id in sorted(common_ids):
        original_line = normalized_to_original[norm_id]
        f.write(original_line + "\n")

print(f"\nSaved {found_count} found keys to '{output_txt}'")

Total keys (original lines): 195
Unique normalized keys: 195
Total subject folders in /BEE/Connectome/ABCD/ImageData/Data_abcd_asd_scr_pos: 161

=== Comparison Results ===
Keys found in data: 161
Keys missing in data: 34
Data folders not in key list: 0

Saved 161 found keys to 'abcd_qc_passing_keys_161.txt'


In [2]:
# Define the input and output filenames
input_file = "abcd_qc_passing_keys_161.txt"
output_file = "abcd_qc_passing_keys_161_no_undersc.txt"

# Read the input file and remove underscores from each line
with open(input_file, "r") as infile:
    lines = infile.readlines()

with open(output_file, "w") as outfile:
    for line in lines:
        # Remove leading/trailing whitespace and underscores, then write to file
        new_line = line.strip().replace("_", "")
        outfile.write(new_line + "\n")

print(f"Saved modified keys to {output_file}")

Saved modified keys to abcd_qc_passing_keys_161_no_undersc.txt


In [2]:
from antspynet import brain_extraction
help(brain_extraction)

Help on function brain_extraction in module antspynet.utilities.brain_extraction:

brain_extraction(image, modality, verbose=False)
    Perform brain extraction using U-net and ANTs-based training data.  "NoBrainer"
    is also possible where brain extraction uses U-net and FreeSurfer training data
    ported from the

    https://github.com/neuronets/nobrainer-models

    Arguments
    ---------
    image : ANTsImage
        input image (or list of images for multi-modal scenarios).

    modality : string
        Modality image type.  Options include:
            * "t1": T1-weighted MRI---ANTs-trained.  Previous versions are specified as "t1.v0", "t1.v1".
            * "t1nobrainer": T1-weighted MRI---FreeSurfer-trained: h/t Satra Ghosh and Jakub Kaczmarzyk.
            * "t1combined": Brian's combination of "t1" and "t1nobrainer".  One can also specify
                            "t1combined[X]" where X is the morphological radius.  X = 12 by default.
            * "t1threetissue":  

In [4]:
import ants
import antspynet
print("ants:", ants.__version__)
print("antspynet:", antspynet.__version__)

ants: 0.5.4


AttributeError: module 'antspynet' has no attribute '__version__'

In [5]:
import ants
print(ants.__file__)
print(ants.__version__)

/ASD2/emre_projects/miniconda3/envs/myenv/lib/python3.12/site-packages/ants/__init__.py
0.5.4
