In [None]:
%matplotlib notebook

In [None]:
# Standard libraries included in Python distribution
import os
import re
import random
import pickle

# Installed libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pandas as pd

from PIL import Image
from PIL import GifImagePlugin
from numpy import asarray

### Section 1: Navigate through all images in directory iteratively and display in pandas dataframe the dimensions of the images

**Methodology** :
1. Use *PIL* package to convert the gif to numpy.
2. Obtain full path to file and then the shape of the image.

**Objectives** : 
1. Find out if all the images are of the same dimension.
2. Store information of all image files to be able to traverse easily.

In [None]:
# Define base path where files will be stored.
# This is unpacked from the pickle file created in Step 0.

with open('pickledHomeScratchShared.pickle', "rb") as f:
    baseHomePath,baseScratchPath,baseSharedPath = pickle.load(f)

In [None]:
# Step 1: Transverse - Navigate through the whole directory and build all paths if 'anon_111_t88_gfc{}tra_90' files are found

fileShapeList = list()
for root, dirs, files in os.walk("{}/data".format(baseScratchPath)):
    for file in files:
        if re.match("^.+anon_111_t88_gfc.+tra_90\.gif$",file):
            fullFilePath = root+'/'+file
            # Step 2: Convert the image to Numpy ndarray object and find the shape of the object
            img = asarray(Image.open(fullFilePath))
            mriImgID = re.search(r'/([^/]+)$', os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(fullFilePath))))).group(1)
            fileShapeList.append((mriImgID,file,fullFilePath,img.shape)) #Delete later

# Step 3: Display in form of pandas dataframe that contains all sessions from each MRI visit for all patients
imageInfoDf_t = pd.DataFrame(fileShapeList,columns=['MRI_ID','File name','Full path','Shape of image'])
imageInfoDf_t

In [None]:
# Step 2: Coronal - Navigate through the whole directory and build all paths if 'anon_111_t88_gfc{}cor_110' files are found

fileShapeList = list()
for root, dirs, files in os.walk("{}/data".format(baseScratchPath)):
    for file in files:
        if re.match("^.+anon_111_t88_gfc.+cor_110\.gif$",file):
            fullFilePath = root+'/'+file
            # Step 2: Convert the image to Numpy ndarray object and find the shape of the object
            img = asarray(Image.open(fullFilePath))
            mriImgID = re.search(r'/([^/]+)$', os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(fullFilePath))))).group(1)
            fileShapeList.append((mriImgID,file,fullFilePath,img.shape)) #Delete later

# Step 3: Display in form of pandas dataframe that contains all sessions from each MRI visit for all patients
imageInfoDf_c = pd.DataFrame(fileShapeList,columns=['MRI_ID','File name','Full path','Shape of image'])
imageInfoDf_c

In [None]:
# Step 2: Sagittal - Navigate through the whole directory and build all paths if 'anon_111_t88_gfc{}sag_95' files are found

fileShapeList = list()
for root, dirs, files in os.walk("{}/data".format(baseScratchPath)):
    for file in files:
        if re.match("^.+anon_111_t88_gfc.+sag_95\.gif$",file):
            fullFilePath = root+'/'+file
            # Step 2: Convert the image to Numpy ndarray object and find the shape of the object
            img = asarray(Image.open(fullFilePath))
            mriImgID = re.search(r'/([^/]+)$', os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(fullFilePath))))).group(1)
            fileShapeList.append((mriImgID,file,fullFilePath,img.shape)) #Delete later

# Step 3: Display in form of pandas dataframe that contains all sessions from each MRI visit for all patients
imageInfoDf_s = pd.DataFrame(fileShapeList,columns=['MRI_ID','File name','Full path','Shape of image'])
imageInfoDf_s

### Section 2 : Load image using PIL.Image and display image

In [None]:
# Helper function to take absolute path to produce numpy
def imgPathtoNp(path):
    # Load using PIL.Image
    img_arr = asarray(Image.open(path))
    # Normalize the data
    #img_arr = np.round((img_arr/img_arr.max())*255).astype(np.uint8)
    return img_arr

# Function to show image using Numpy ndarray
def showImg(ndarr):
    return plt.imshow(ndarr, cmap=plt.cm.gray_r, interpolation="nearest")

In [None]:
# Take a random file from imageInfoDf and load using Nibabel
randImgIndex = random.randint(0,len(imageInfoDf_c)-1)
#sampleImg = nib.load(imageInfoDf.iloc[randImgIndex]['Full path'])

sampleImgData = imgPathtoNp(imageInfoDf_c.iloc[250]['Full path'])
print('Sample picked out is : {}'.format(imageInfoDf_c.iloc[randImgIndex]['Full path']))

# Review shape of the Numpy ndarray that encodes the image data
sampleImgData.shape

In [None]:
# Static image showing data
plt.close();

# Code to display image
showImg(sampleImgData);

### Section 3 : Pull in CDR labels, transform to binary and then join to `ImageInfoDf`

In [None]:
# Read in files containing labels
oasis_1_datatable = pd.read_csv('{}/milestone_II_project/data/oasis_labelled_data/oasis_1_labelled_data.csv'.format(baseHomePath))
oasis_2_datatable = pd.read_excel('{}/milestone_II_project/data/oasis_labelled_data/oasis_2_labelled_data.xlsx'.format(baseHomePath))

In [None]:
# Normalize content from both datatables and make it into a single style

# Filter and transform to datasets that will be used in the project
oasis1DataSet = oasis_1_datatable.copy() # 436 rows expected from this operation
oasis1DataSet['CDR'] = oasis1DataSet.CDR.fillna(0) # Big assumption that 'NA' == not demented
oasis1DataSet['dem_labels'] = oasis1DataSet['CDR'].map(lambda x: 0 if x==0 else 1)
oasis1DataSet.rename(columns={'ID':'MRI_ID'},inplace=True)
oasis1DataSet = oasis1DataSet[['MRI_ID','dem_labels']]


# Link imageInfoDf_x dataframe (i.e. containing image metadata) to oasis1DataSet (i.e. containing labels) 
# to create `oasisMasterDf_x`

oasisMasterDf_t = imageInfoDf_t.merge(right =oasis1DataSet,on='MRI_ID')
oasisMasterDf_s = imageInfoDf_s.merge(right =oasis1DataSet,on='MRI_ID')
oasisMasterDf_c = imageInfoDf_c.merge(right =oasis1DataSet,on='MRI_ID')

### Section 4 : Create arrays and serialize to files using pickle

#### 5.2 - Create numpy `ndarray` of all the labels in the same order as it is in `oasisMasterDf`

#### Also, optionally creating a list of the `MRI_ID`, in case this is necessary for any downstream tasks.

In [None]:
all_labels_processed_t = oasisMasterDf_t['dem_labels'].to_numpy()
all_mri_id_processed_t = oasisMasterDf_t['MRI_ID'].to_numpy()

all_labels_processed_s = oasisMasterDf_s['dem_labels'].to_numpy()
all_mri_id_processed_s = oasisMasterDf_s['MRI_ID'].to_numpy()

all_labels_processed_c = oasisMasterDf_c['dem_labels'].to_numpy()
all_mri_id_processed_c = oasisMasterDf_c['MRI_ID'].to_numpy()

#### 5.3 - Serialize and store each of `all_labels` and `all_mri_id`

In [None]:
with open("{}/all_labels_processed_t.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_labels_processed_t, f)
    
with open("{}/all_mri_id_processed_t.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_mri_id_processed_t, f)
    
with open("{}/all_labels_processed_s.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_labels_processed_s, f)
    
with open("{}/all_mri_id_processed_s.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_mri_id_processed_s, f)

with open("{}/all_labels_processed_c.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_labels_processed_c, f)
    
with open("{}/all_mri_id_processed_c.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(all_mri_id_processed_c, f)
    

### Section 6 : Create stitiched `Transverse` arrays using `skipFrames=120` on each end to reduce dimensionality to `256 -120 -120 = 16` frames

#### 6.1 - Create numpy `ndarray` of all the images in the same order as it is in `oasisMasterDf`.
#### Here, we are keeping only the middle 16 slices

In [None]:
%%time

# Convert image path to numpy ndarray as this is the one we will be using.
ImgPathAsNumpy_t = oasisMasterDf_t['Full path'].to_numpy()
ImgPathAsNumpy_s = oasisMasterDf_s['Full path'].to_numpy()
ImgPathAsNumpy_c = oasisMasterDf_c['Full path'].to_numpy()

# Step 1: Create vectorized function
vec_func_gif = np.vectorize(imgPathtoNp,otypes=[np.ndarray])

# Step 2: Apply to each of the datasets
processed_img_t = vec_func_gif(ImgPathAsNumpy_t)
processed_img_s = vec_func_gif(ImgPathAsNumpy_s)
processed_img_c = vec_func_gif(ImgPathAsNumpy_c)

In [None]:
with open("{}/processed_img_t.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(processed_img_t, f)
with open("{}/processed_img_s.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(processed_img_s, f)
with open("{}/processed_img_c.pickle".format(baseSharedPath), "wb") as f:
    pickle.dump(processed_img_c, f)