In [None]:
import pandas as pd

# Read the sample file
file_path = r"D:\01-Raw_data-spectro\Precuneus\3100205_255881_V17S\first_run_data\fit_tissue_adjusted\summary.csv"

# Load and display the data
print("=" * 80)
print("SAMPLE FILE STRUCTURE")
print("=" * 80)
print(f"\nFile: {file_path}\n")

# Read the CSV
df = pd.read_csv(file_path)

# Display basic info
print(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns\n")

print("Column names:")
print(df.columns.tolist())
print("\n")

print("First few rows:")
print(df.head(10))
print("\n")

print("Data types:")
print(df.dtypes)
print("\n")

print("Full data:")
print(df)



In [None]:
"""
Parse glutamate spectroscopy data from Precuneus folder
Creates one output file with all participants and their metabolite measurements
"""

import pandas as pd
import os
from pathlib import Path

# Configuration
PRECUNEUS_FOLDER = r"D:\01-Raw_data-spectro\Precuneus"
OUTPUT_FILE = r"c:\Users\okkam\Documents\GitHub\glutamate_ad_longitudinal_sourcecode\precuneus_metabolite_data.csv"

# Metabolites to extract (in order)
METABOLITES = ['Glu', 'GABA', 'Gly', 'NAA', 'Cr', 'Cr+PCr', 'GPC', 'PCh', 'mI']

# Columns to extract for each metabolite (in order)
COLUMNS = ['mM', '/Cr+PCr', '%CRLB', 'SNR', 'FWHM']


def extract_participant_data(summary_file_path, participant_id):
    """
    Extract metabolite data from a single participant's summary.csv file
    
    Args:
        summary_file_path: Path to the summary.csv file
        participant_id: Participant folder name (e.g., 3100205_255881_V17S)
    
    Returns:
        Dictionary with participant data
    """
    # Read the summary file
    df = pd.read_csv(summary_file_path)
    
    # Initialize result dictionary with participant ID
    result = {'Participant_ID': participant_id}
    
    # Extract data for each metabolite
    for metabolite in METABOLITES:
        # Find the row for this metabolite
        metabolite_row = df[df['Metab'] == metabolite]
        
        if len(metabolite_row) == 0:
            # Metabolite not found - fill with NaN
            for column in COLUMNS:
                result[f'{metabolite}_{column}'] = None
        else:
            # Extract each column value
            for column in COLUMNS:
                result[f'{metabolite}_{column}'] = metabolite_row[column].values[0]
    
    return result


def parse_all_participants():
    """
    Parse all participants in the Precuneus folder
    
    Returns:
        DataFrame with all participants' data
    """
    all_data = []
    
    # Get all participant folders in Precuneus
    precuneus_path = Path(PRECUNEUS_FOLDER)
    
    if not precuneus_path.exists():
        print(f"ERROR: Precuneus folder not found at {PRECUNEUS_FOLDER}")
        return None
    
    # Iterate through all participant folders
    participant_folders = [f for f in precuneus_path.iterdir() if f.is_dir()]
    
    print(f"Found {len(participant_folders)} participant folders")
    print("Processing participants...\n")
    
    for participant_folder in participant_folders:
        participant_id = participant_folder.name
        
        # Construct path to summary.csv
        summary_path = participant_folder / "first_run_data" / "fit_tissue_adjusted" / "summary.csv"
        
        if summary_path.exists():
            try:
                # Extract data for this participant
                participant_data = extract_participant_data(summary_path, participant_id)
                all_data.append(participant_data)
                print(f"✓ Processed: {participant_id}")
            except Exception as e:
                print(f"✗ Error processing {participant_id}: {e}")
        else:
            print(f"✗ Summary file not found for {participant_id}")
    
    # Convert to DataFrame
    if len(all_data) > 0:
        df = pd.DataFrame(all_data)
        print(f"\n{'='*80}")
        print(f"Successfully processed {len(all_data)} participants")
        print(f"{'='*80}")
        return df
    else:
        print("No data was extracted!")
        return None


def main():
    """Main function to run the parsing"""
    print("="*80)
    print("PRECUNEUS METABOLITE DATA PARSER")
    print("="*80)
    print(f"\nSource folder: {PRECUNEUS_FOLDER}")
    print(f"Output file: {OUTPUT_FILE}")
    print(f"\nMetabolites: {', '.join(METABOLITES)}")
    print(f"Columns per metabolite: {', '.join(COLUMNS)}")
    print("="*80)
    print()
    
    # Parse all participants
    df = parse_all_participants()
    
    if df is not None:
        # Save to CSV
        df.to_csv(OUTPUT_FILE, index=False)
        print(f"\n✓ Data saved to: {OUTPUT_FILE}")
        print(f"\nOutput shape: {df.shape[0]} rows × {df.shape[1]} columns")
        print("\nFirst few rows:")
        print(df.head())
    else:
        print("\n✗ No output file created")


if __name__ == "__main__":
    main()


In [None]:
"""
Parse glutamate spectroscopy data from ACC folder
Creates one output file with all participants and their metabolite measurements
"""

import pandas as pd
import os
from pathlib import Path

# Configuration
PRECUNEUS_FOLDER = r"D:\01-Raw_data-spectro\ACC"
OUTPUT_FILE = r"c:\Users\okkam\Documents\GitHub\glutamate_ad_longitudinal_sourcecode\precuneus_metabolite_data.csv"

# Metabolites to extract (in order)
METABOLITES = ['Glu', 'GABA', 'Gly', 'NAA', 'Cr', 'Cr+PCr', 'GPC', 'PCh', 'mI']

# Columns to extract for each metabolite (in order)
COLUMNS = ['mM', '/Cr+PCr', '%CRLB', 'SNR', 'FWHM']


def extract_participant_data(summary_file_path, participant_id):
    """
    Extract metabolite data from a single participant's summary.csv file
    
    Args:
        summary_file_path: Path to the summary.csv file
        participant_id: Participant folder name (e.g., 3100205_255881_V17S)
    
    Returns:
        Dictionary with participant data
    """
    # Read the summary file
    df = pd.read_csv(summary_file_path)
    
    # Initialize result dictionary with participant ID
    result = {'Participant_ID': participant_id}
    
    # Extract data for each metabolite
    for metabolite in METABOLITES:
        # Find the row for this metabolite
        metabolite_row = df[df['Metab'] == metabolite]
        
        if len(metabolite_row) == 0:
            # Metabolite not found - fill with NaN
            for column in COLUMNS:
                result[f'{metabolite}_{column}'] = None
        else:
            # Extract each column value
            for column in COLUMNS:
                result[f'{metabolite}_{column}'] = metabolite_row[column].values[0]
    
    return result


def parse_all_participants():
    """
    Parse all participants in the Precuneus folder
    
    Returns:
        DataFrame with all participants' data
    """
    all_data = []
    
    # Get all participant folders in Precuneus
    precuneus_path = Path(PRECUNEUS_FOLDER)
    
    if not precuneus_path.exists():
        print(f"ERROR: Precuneus folder not found at {PRECUNEUS_FOLDER}")
        return None
    
    # Iterate through all participant folders
    participant_folders = [f for f in precuneus_path.iterdir() if f.is_dir()]
    
    print(f"Found {len(participant_folders)} participant folders")
    print("Processing participants...\n")
    
    for participant_folder in participant_folders:
        participant_id = participant_folder.name
        
        # Construct path to summary.csv
        summary_path = participant_folder / "first_run_data" / "fit_tissue_adjusted" / "summary.csv"
        
        if summary_path.exists():
            try:
                # Extract data for this participant
                participant_data = extract_participant_data(summary_path, participant_id)
                all_data.append(participant_data)
                print(f"✓ Processed: {participant_id}")
            except Exception as e:
                print(f"✗ Error processing {participant_id}: {e}")
        else:
            print(f"✗ Summary file not found for {participant_id}")
    
    # Convert to DataFrame
    if len(all_data) > 0:
        df = pd.DataFrame(all_data)
        print(f"\n{'='*80}")
        print(f"Successfully processed {len(all_data)} participants")
        print(f"{'='*80}")
        return df
    else:
        print("No data was extracted!")
        return None


def main():
    """Main function to run the parsing"""
    print("="*80)
    print("PRECUNEUS METABOLITE DATA PARSER")
    print("="*80)
    print(f"\nSource folder: {PRECUNEUS_FOLDER}")
    print(f"Output file: {OUTPUT_FILE}")
    print(f"\nMetabolites: {', '.join(METABOLITES)}")
    print(f"Columns per metabolite: {', '.join(COLUMNS)}")
    print("="*80)
    print()
    
    # Parse all participants
    df = parse_all_participants()
    
    if df is not None:
        # Save to CSV
        df.to_csv(OUTPUT_FILE, index=False)
        print(f"\n✓ Data saved to: {OUTPUT_FILE}")
        print(f"\nOutput shape: {df.shape[0]} rows × {df.shape[1]} columns")
        print("\nFirst few rows:")
        print(df.head())
    else:
        print("\n✗ No output file created")


if __name__ == "__main__":
    main()
