In [None]:
!pip install nilearn



In [None]:
from nilearn.datasets import fetch_oasis_vbm
from nilearn.image import load_img
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
oasis = fetch_oasis_vbm(dartel_version=True)
gray_matter_maps = oasis.gray_matter_maps
ext_vars = pd.DataFrame(oasis.ext_vars)

In [None]:
# Number of subjects (based on gray matter maps)
num_subjects = len(oasis['gray_matter_maps'])
print(f"Number of subjects: {num_subjects}")

Number of subjects: 403


In [None]:
# Check clinical and demographic variables
import pandas as pd
ext_vars = pd.DataFrame(oasis['ext_vars'])
print("\nClinical/demographic variables (first few rows):")
print(ext_vars.head())


Clinical/demographic variables (first few rows):
              id mf hand  age  educ  ses  mmse  cdr  etiv   nwbv    asf  delay
0  OAS1_0001_MR1  F    R   74   2.0  3.0  29.0  0.0  1344  0.743  1.306    NaN
1  OAS1_0002_MR1  F    R   55   4.0  1.0  29.0  0.0  1147  0.810  1.531    NaN
2  OAS1_0003_MR1  F    R   73   4.0  3.0  27.0  0.5  1454  0.708  1.207    NaN
3  OAS1_0004_MR1  M    R   28   NaN  NaN   NaN  NaN  1588  0.803  1.105    NaN
4  OAS1_0005_MR1  M    R   18   NaN  NaN   NaN  NaN  1737  0.848  1.010    NaN


In [None]:
ext_vars.head(50)

Unnamed: 0,id,mf,hand,age,educ,ses,mmse,cdr,etiv,nwbv,asf,delay
0,OAS1_0001_MR1,F,R,74,2.0,3.0,29.0,0.0,1344,0.743,1.306,
1,OAS1_0002_MR1,F,R,55,4.0,1.0,29.0,0.0,1147,0.81,1.531,
2,OAS1_0003_MR1,F,R,73,4.0,3.0,27.0,0.5,1454,0.708,1.207,
3,OAS1_0004_MR1,M,R,28,,,,,1588,0.803,1.105,
4,OAS1_0005_MR1,M,R,18,,,,,1737,0.848,1.01,
5,OAS1_0006_MR1,F,R,24,,,,,1131,0.862,1.551,
6,OAS1_0007_MR1,M,R,21,,,,,1516,0.83,1.157,
7,OAS1_0009_MR1,F,R,20,,,,,1505,0.843,1.166,
8,OAS1_0010_MR1,M,R,74,5.0,2.0,30.0,0.0,1636,0.689,1.073,
9,OAS1_0011_MR1,F,R,52,3.0,2.0,30.0,0.0,1321,0.827,1.329,


In [None]:
ext_vars['hand'].value_counts()


Unnamed: 0_level_0,count
hand,Unnamed: 1_level_1
R,403


In [None]:
ext_vars.info()


<class 'pandas.core.frame.DataFrame'>
Index: 403 entries, 0 to 414
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   id      403 non-null    object 
 1   mf      403 non-null    object 
 2   hand    403 non-null    object 
 3   age     403 non-null    int64  
 4   educ    226 non-null    float64
 5   ses     207 non-null    float64
 6   mmse    226 non-null    float64
 7   cdr     226 non-null    float64
 8   etiv    403 non-null    int64  
 9   nwbv    403 non-null    float64
 10  asf     403 non-null    float64
 11  delay   0 non-null      float64
dtypes: float64(7), int64(2), object(3)
memory usage: 40.9+ KB


In [None]:
ext_vars.isna().sum()


Unnamed: 0,0
id,0
mf,0
hand,0
age,0
educ,177
ses,196
mmse,177
cdr,177
etiv,0
nwbv,0


In [None]:
# Check rows where 'cdr' is NaN and get the corresponding 'age' values
nan_adr_ages = ext_vars[ext_vars['cdr'].isna()]['age']

# Print the result
print(nan_adr_ages)

3      28
4      18
5      24
6      21
7      20
       ..
397    21
400    23
402    30
406    22
408    19
Name: age, Length: 177, dtype: int64


In [None]:
# Get only the 'age' values where 'cdr' is NaN
nan_adr_ages = ext_vars[ext_vars['cdr'].isna()]['age'].values

# Print all 177 values
print(nan_adr_ages)


[28 18 24 21 20 30 19 21 24 21 27 23 38 21 29 57 20 48 24 21 20 20 20 48
 18 20 25 25 18 21 40 20 18 22 28 23 19 29 18 19 24 20 20 25 23 25 19 26
 22 21 30 18 24 22 24 23 24 22 34 25 23 20 25 23 23 20 40 57 20 18 41 50
 35 23 44 48 44 22 43 21 31 23 21 23 22 20 48 18 26 22 26 19 20 22 37 20
 29 22 28 21 20 21 28 24 54 50 22 28 18 20 20 20 28 22 48 20 22 20 27 33
 31 19 27 50 19 54 26 41 19 20 23 22 43 21 22 21 20 22 23 46 31 25 20 58
 38 22 26 26 55 24 22 26 25 20 19 25 55 22 34 23 25 21 23 30 21 22 22 45
 22 50 23 22 21 23 30 22 19]


In [None]:
# Check the age of patients gouping them by cdr score

for score in [0.5, 1.0, 2.0]:
    ages = ext_vars[ext_vars['cdr'] == score]['age']
    print(f"\nCDR = {score}:")
    print(ages.values)



CDR = 0.5:
[73 76 82 80 69 82 70 62 80 64 79 75 81 66 67 72 70 83 73 70 66 71 81 84
 81 80 87 75 73 78 90 77 77 74 64 90 79 80 75 89 83 71 83 72 68 84 67 73
 77 80 80 79 81 63 73 83 69 92 76 71 74 72 86 81 92 73 70 73]

CDR = 1.0:
[86 88 84 78 83 72 71 69 80 87 65 78 84 78 96 73 72 80 67 77 78 77 75 78
 71 75]

CDR = 2.0:
[78 86]


In [None]:
# Chech the shape of the image
from nilearn.image import load_img

# Load the first image path
first_image_path = oasis['gray_matter_maps'][0]

# Load the Nifti image
first_image = load_img(first_image_path)

# Get the shape of the image data
image_shape = first_image.shape

print(f"Shape of the first MRI image: {image_shape}")

Shape of the first MRI image: (91, 109, 91)
