In [1]:
import pydicom
import os
import pandas as pd

# Path to your DICOM files
dicom_dir = '/Users/eleanorbolton/Library/CloudStorage/OneDrive-UniversityofLeeds/t1_vibe_we_hand_subset'

# Initialize counters and results storage
subject_count = 0
total_images = 0
images_per_subject = {}
results = []

# Loop through the DICOM files
processed_patients = set()
for root, dirs, files in os.walk(dicom_dir):
    for file in files:
        if file.endswith('.DCM'):
            filepath = os.path.join(root, file)
            dicom_data = pydicom.dcmread(filepath)
            
            # Extract the patient ID
            patient_id = dicom_data.PatientID
            
            # If this patient has already been processed, skip to the next file
            if patient_id in processed_patients:
                continue
            
            # Mark this patient as processed
            processed_patients.add(patient_id)
            
            # Extract metadata
            result = {
                "File Name": os.path.basename(filepath),
                "Patient ID (0010, 0020)": getattr(dicom_data, 'PatientID', 'N/A'),
                "Study Date (0008, 0020)": getattr(dicom_data, 'StudyDate', 'N/A'),
                "Modality (0008, 0060)": getattr(dicom_data, 'Modality', 'N/A'),
                "Study Description (0008, 1030)": getattr(dicom_data, 'StudyDescription', 'N/A'),
                "Scanning Sequence (0018, 0020)": getattr(dicom_data, 'ScanningSequence', 'N/A'),
                "Sequence Name (0018, 0024)": getattr(dicom_data, 'SequenceName', 'N/A'),
                "Slice Thickness (0018, 0050)": getattr(dicom_data, 'SliceThickness', 'N/A'),
                "Repetition Time (TR) (0018, 0080)": getattr(dicom_data, 'RepetitionTime', 'N/A'),
                "Echo Time (TE) (0018, 0081)": getattr(dicom_data, 'EchoTime', 'N/A'),
                "Magnetic Field Strength (0018, 0087)": getattr(dicom_data, 'MagneticFieldStrength', 'N/A'),
                "Flip Angle (0018, 1314)": getattr(dicom_data, 'FlipAngle', 'N/A'),
                "Pixel Spacing (0028, 0030)": getattr(dicom_data, 'PixelSpacing', 'N/A'),
                "Rows (0028, 0010)": getattr(dicom_data, 'Rows', 'N/A'),
                "Columns (0028, 0011)": getattr(dicom_data, 'Columns', 'N/A'),
                "Study Instance UID (0020, 000d)": getattr(dicom_data, 'StudyInstanceUID', 'N/A'),
                "Series Instance UID (0020, 000e)": getattr(dicom_data, 'SeriesInstanceUID', 'N/A'),
                "SOP Instance UID (0008, 0018)": getattr(dicom_data, 'SOPInstanceUID', 'N/A')
            }
            results.append(result)
            
            # Count subjects and images
            if patient_id not in images_per_subject:
                subject_count += 1
                images_per_subject[patient_id] = 0
            images_per_subject[patient_id] += 1
            total_images += 1
            
            # Move on to the next patient (break out of the loop after processing one file per patient)
            break

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
output_csv = "dicom_metadata.csv"
df.to_csv(output_csv, index=False)

print(f"Metadata saved to {output_csv}")

Metadata saved to dicom_metadata.csv
