<a href="https://colab.research.google.com/github/adecora/ADconvolutionalneuralnetwork/blob/main/00_exploring_the_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Explore PET data

In [1]:
# mount drive to access data file

from google.colab import drive
drive.mount('./mount')

Mounted at ./mount


In [2]:
# import libraries
import os
from xml.etree.ElementTree import parse

In [3]:
def search_files(path, extensions):
  """
  Counts the number of files of the given extensions in a path recursively.
  It returns a dictionary with the basename of the direcotries and the number 
  of files in it
  Parameters:
    path:  path were to find the files
    extensions: file extensions to look for
  """
  data_files = {}
  for subdir, dirs, files in os.walk(path):
    num_files = 0
    for file in files:
      if os.path.splitext(file)[1] in extensions:
        num_files += 1
    if num_files:
      data_files[os.path.basename(subdir)] = num_files
  return data_files

In [4]:
data_path = "/content/mount/MyDrive/PET"
data_files = search_files(data_path, ['.nii'])
print(data_files)

{'NOR': 136, 'AD': 140}


In [5]:
# Count the total number of .nii files
print(f"The total number of .nii files is {sum(data_files.values())}")

The total number of .nii files is 276


### Checking the info save in .xml files

In [6]:
def parse_xml(xml, select=None, types=None):
  '''
  Parse a XML file into a dictionary with type conversion.
  '''
  # We start by reading some fix info
  xml_info = {}
  for subjectInfo in xml.findall(".//subjectInfo"):
    try: 
      xml_info[subjectInfo.attrib['item']] = int(subjectInfo.text)
    except ValueError as e:
      xml_info[subjectInfo.attrib['item']] = subjectInfo.text

  if select:
    try:
      values = [ xml.findall(f".//{key}")[-1].text for key in select ]
    except IndexError as e:
      pass
  
  if types:
    try:
      values = [func(val) for func, val in zip(types, values)]
    except ValueError as e:
      pass

  if values:
    for key, val in zip(select, values):
      xml_info[key] = val
  
  return xml_info

In [7]:
xml_path = "/content/mount/MyDrive/PET/AD/ADNI_005_S_0221_Coreg,_Avg,_Std_Img_and_Vox_Siz,_Uniform_Resolution_S12240_I26243.xml"

In [8]:
# We extract some useful info from the .xml file
parse_xml(parse(xml_path), ['subjectSex','subjectAge','weightKg'], [str, float, float])

{'APOE A1': 4,
 'APOE A2': 4,
 'DX Group': 'AD',
 'subjectAge': 67.57,
 'subjectSex': 'M',
 'weightKg': 112.94}