# OASIS-1 Metadata Inspection

Goal: inspect demographic and diagnostic metadata to identify labels for the baseline BrainMRI-Net model.


In [1]:
import sys
import pandas as pd
from pathlib import Path

print("Python executable:")
print(sys.executable)

print("\nPandas version:")
print(pd.__version__)


Python executable:
/opt/anaconda3/envs/brainnetmri-env/bin/python

Pandas version:
2.3.3


In [2]:
from pathlib import Path
import pandas as pd

metadata_path = Path("..") / "data" / "raw" / "oasis_cross-sectional_080.xlsx"
df = pd.read_excel(metadata_path)
df.head()


FileNotFoundError: [Errno 2] No such file or directory: '../data/raw/oasis_cross-sectional_080.xlsx'

In [3]:
from pathlib import Path

data_dir = Path("..") / "data" / "raw"
print(list(data_dir.glob("*.xlsx")))


[PosixPath('../data/raw/oasis_cross-sectional-5708aa0a98d82080.xlsx')]


In [4]:
from pathlib import Path

data_dir = Path("..") / "data" / "raw"
list(data_dir.iterdir())


[PosixPath('../data/raw/.DS_Store'),
 PosixPath('../data/raw/oasis1_demographics.xlsx')]

In [5]:
import pandas as pd
from pathlib import Path

metadata_path = Path("..") / "data" / "raw" / "oasis1_demographics.xlsx"
df = pd.read_excel(metadata_path)

df.head()


Unnamed: 0,ID,M/F,Hand,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF,Delay
0,OAS1_0001_MR1,F,R,74,2.0,3.0,29.0,0.0,1344,0.743,1.306,
1,OAS1_0002_MR1,F,R,55,4.0,1.0,29.0,0.0,1147,0.81,1.531,
2,OAS1_0003_MR1,F,R,73,4.0,3.0,27.0,0.5,1454,0.708,1.207,
3,OAS1_0004_MR1,M,R,28,,,,,1588,0.803,1.105,
4,OAS1_0005_MR1,M,R,18,,,,,1737,0.848,1.01,


In [6]:
df.describe

<bound method NDFrame.describe of                 ID M/F Hand  Age  Educ  SES  MMSE  CDR  eTIV   nWBV    ASF  \
0    OAS1_0001_MR1   F    R   74   2.0  3.0  29.0  0.0  1344  0.743  1.306   
1    OAS1_0002_MR1   F    R   55   4.0  1.0  29.0  0.0  1147  0.810  1.531   
2    OAS1_0003_MR1   F    R   73   4.0  3.0  27.0  0.5  1454  0.708  1.207   
3    OAS1_0004_MR1   M    R   28   NaN  NaN   NaN  NaN  1588  0.803  1.105   
4    OAS1_0005_MR1   M    R   18   NaN  NaN   NaN  NaN  1737  0.848  1.010   
..             ...  ..  ...  ...   ...  ...   ...  ...   ...    ...    ...   
431  OAS1_0285_MR2   M    R   20   NaN  NaN   NaN  NaN  1469  0.847  1.195   
432  OAS1_0353_MR2   M    R   22   NaN  NaN   NaN  NaN  1684  0.790  1.042   
433  OAS1_0368_MR2   M    R   22   NaN  NaN   NaN  NaN  1580  0.856  1.111   
434  OAS1_0379_MR2   F    R   20   NaN  NaN   NaN  NaN  1262  0.861  1.390   
435  OAS1_0395_MR2   F    R   26   NaN  NaN   NaN  NaN  1283  0.834  1.368   

     Delay  
0      NaN  
1  

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 436 entries, 0 to 435
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      436 non-null    object 
 1   M/F     436 non-null    object 
 2   Hand    436 non-null    object 
 3   Age     436 non-null    int64  
 4   Educ    235 non-null    float64
 5   SES     216 non-null    float64
 6   MMSE    235 non-null    float64
 7   CDR     235 non-null    float64
 8   eTIV    436 non-null    int64  
 9   nWBV    436 non-null    float64
 10  ASF     436 non-null    float64
 11  Delay   20 non-null     float64
dtypes: float64(7), int64(2), object(3)
memory usage: 41.0+ KB


In [8]:
df["CDR"].value_counts(dropna=False)

CDR
NaN    201
0.0    135
0.5     70
1.0     28
2.0      2
Name: count, dtype: int64

In [9]:
df["CDR"].isna().sum()

np.int64(201)

In [11]:
# 1) keeping only labeled rows
df_labeled = df.dropna(subset=["CDR"]).copy()

# 2) binary target: 0 = normal, 1 = impaired
df_labeled["y_binary"] = (df_labeled["CDR"] > 0).astype(int)

# quick sanity checks
print("Total rows:", len(df))
print("Labeled rows:", len(df_labeled))
print("\nBinary label counts:")
print(df_labeled["y_binary"].value_counts())


Total rows: 436
Labeled rows: 235

Binary label counts:
y_binary
0    135
1    100
Name: count, dtype: int64


In [12]:
from pathlib import Path

processed_dir = Path("..") / "data" / "processed"
processed_dir.mkdir(parents=True, exist_ok=True)

out_path = processed_dir / "oasis1_metadata_labeled_binary.csv"
df_labeled.to_csv(out_path, index=False)

print("Saved:", out_path.resolve())

Saved: /Users/gurleenlakhman/Desktop/Projects/BrainMRI-Net/data/processed/oasis1_metadata_labeled_binary.csv


In [13]:
from pathlib import Path

out_path = Path("..") / "data" / "processed" / "oasis1_metadata_labeled_binary.csv"
out_path.exists(), out_path.resolve()

(True,
 PosixPath('/Users/gurleenlakhman/Desktop/Projects/BrainMRI-Net/data/processed/oasis1_metadata_labeled_binary.csv'))