<a href="https://colab.research.google.com/github/eliza-giane/super-resolution-dl-project/blob/main/Superresolution_Deep_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Introduction**

**This is the ACDC Superresolution Project for Deep Learning at AIT by Eliza Giane, Shirui Li, and Lydia Yang.**


This project was prompted by the challenge (found [here](https://www.creatis.insa-lyon.fr/Challenge/acdc/index.html)). The data consists of real anonymized and regulated clinical exams from the University Hospital of Dijon, and is described as follows on the challenge's website:

\\

---
"Our dataset covers several well-defined pathologies with enough cases to (1) properly train machine learning methods and (2) clearly assess the variations of the main physiological parameters obtained from cine-MRI (in particular diastolic volume and ejection fraction)."

"The dataset is composed of 150 exams (all from different patients) divided into 5 evenly distributed subgroups (4 pathological plus 1 healthy subject groups) as described below. Furthermore, each patient comes with the following additional information : weight, height, as well as the diastolic and systolic phase instants."

---
\\
 The dataset is found [here](https://humanheart-project.creatis.insa-lyon.fr/database/#collection/637218c173e9f0047faa00fb/folder/637218e573e9f0047faa00fc) and the provided code for handling .nii files is found [here](https://www.creatis.insa-lyon.fr/Challenge/acdc/code/metrics_acdc.py).

\\
**Any use of the ACDC database requires the following citation:**

O. Bernard, A. Lalande, C. Zotti, F. Cervenansky, et al.
"Deep Learning Techniques for Automatic MRI Cardiac Multi-structures Segmentation and Diagnosis: Is the Problem Solved ?" in IEEE Transactions on Medical Imaging, vol. 37, no. 11, pp. 2514-2525, Nov. 2018, doi: 10.1109/TMI.2018.2837502

In [14]:
import nibabel as nib
import numpy as np
import tensorflow as tf
import os
import configparser
from configparser import ConfigParser
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Acquiring Data**

The data is stored in Google Drive for access. We organize data into useful forms for pre-processing.

In [2]:
def getImageFiles(patient, trainingOrTesting):
  """
  Gets the directory paths in a dictionary of all relevant images
  trainingOrTesting = "training" or "testing"
  """
  fileDic = {}
  nonDataList = ['MANDATORY_CITATION.md', 'Info.cfg', patient + "_4d.nii.gz"]
  directory_path = '/content/drive/MyDrive/Superresolution/database/' + trainingOrTesting
  directory_files = os.listdir(directory_path)
  for patientFile in directory_files:
    fileDic[patientFile] = []
    if patientFile in nonDataList:  #filter out the non-image files
      continue
    directoryPatient = directory_path + "/" + str(patientFile)
    filesPatient = os.listdir(directoryPatient)
    for imageFile in filesPatient:
      if imageFile in nonDataList:  #filter out the non-image files
        continue
      directoryPatientImage = directoryPatient + "/" + str(imageFile)
      fileDic[patientFile].append(directoryPatientImage)
  return fileDic[patient]

def getInfoFiles(patient, trainingOrTesting):
  """
  Gets the directory paths in a dictionary of all relevant images
  trainingOrTesting = "training" or "testing"
  """
  fileDic = {}
  nonDataList = ['MANDATORY_CITATION.md', 'Info.cfg']
  directory_path = '/content/drive/MyDrive/Superresolution/database/' + trainingOrTesting
  directory_files = os.listdir(directory_path)
  for patientFile in directory_files:
    fileDic[patientFile] = []
    if patientFile in nonDataList:  #filter out the non-info files
      continue
    directoryPatient = directory_path + "/" + str(patientFile)
    filesPatient = os.listdir(directoryPatient)
    for imageFile in filesPatient:
      if imageFile != 'Info.cfg':  #filter out the non-info files
        continue
      directoryPatientImage = directoryPatient + "/" + str(imageFile)
      fileDic[patientFile].append(directoryPatientImage)
  return fileDic[patient][0]

def patientNames(trainingOrTesting):
  """Returns a list of patient file names based on specified 'training' or 'testing' data"""
  if trainingOrTesting == 'training':
    L = []
    for i in range(1, 101):
        if i < 10:
            patient = "patient" + "00" + str(i)
        elif 10 <= i <= 99:
            patient = "patient" + "0" + str(i)
        else:
            patient = "patient" + str(i)
        L.append(patient)
    return L
  elif trainingOrTesting == 'testing':
     return [("patient" + str(i)) for i in range(101, 151)]

def getData(trainingOrTesting):
  """Retrieves data for specified 'testing' or 'training' data"""
  arrayDic = {}
  for patient in patientNames(trainingOrTesting):
    arrayDic[patient] = []
    fileDir = getImageFiles(patient, trainingOrTesting)
    for patientDir in fileDir:
      img = nib.load(patientDir)
      arrayDic[patient].append(img.get_fdata())
  return arrayDic

def label(trainingOrTesting):
    """Creates dictionary of labels for patient groups for specified 'training' or 'testing' data"""
    L = patientNames(trainingOrTesting)
    patientDic = {}
    for patient in L:
        infopath = getInfoFiles(patient, trainingOrTesting)
        parser = configparser.ConfigParser()
        with open(infopath) as stream:
            parser.read_string("[top]\n" + stream.read())
        details_dict = dict(parser.items("top"))
        patientDic[patient] = details_dict['group']
    return patientDic

# **Pre-Processing Data**

In [10]:
# Stores a dictionary of testing data for patient and their images in a list of arrays of 4 pictures
x_train = getData('training')
x_test = getData('testing')
# Stores a dictionary of labels for patient groups
y_train = label('training')
y_test = label('testing')

KeyboardInterrupt: 

In [13]:
# Splitting training into training and validation data
train_ratio  = 0.8
train_split  = int(len(x_train)*train_ratio) # index to split the training and validation data
x_valid, y_valid = dict(list(x_train.items())[train_split:]), dict(list(y_train.items())[train_split:])
x_train, y_train = dict(list(x_train.items())[:train_split]), dict(list(y_train.items())[:train_split])

100 100
20 20
80 80


In [9]:
# Initialize dimensions with first patient
dim1_train, dim2_train, dim3_train = x_train['patient001'][0].shape
dim1_test, dim2_test, dim3_test = x_test['patient101'][0].shape

for patient in x_train.keys():
  dim1, dim2, dim3 = x_train[patient][0].shape
  if dim1 < dim1_train:
    dim1_train = dim1

  if dim2 < dim2_train:
    dim2_train = dim2

  if dim3 < dim3_train:
    dim3_train = dim3

for patient in x_test.keys():
  dim1, dim2, dim3 = x_test[patient][0].shape
  if dim1 < dim1_test:
    dim1_test = dim1

  if dim2 < dim2_test:
    dim2_test = dim2

  if dim3 < dim3_test:
    dim3_test = dim3


print(dim1_train, dim2_train, dim3_train)
print(dim1_test, dim2_test, dim3_test)

154 154 6
154 162 6


In [5]:
# for patient in x_test.keys():
#   for i in range(4):
#     print(x_test[patient])
#     x_test[patient][i] = x_test[patient][i][0:dim1_test]

#     for j in range(len(x_test[patient][i])):
#       x_test[patient][i][j] = x_test[patient][i][j][0:dim2_test]

#       for k in range(len(x_test[patient][i][j])):
#         x_test[patient][i][j][k] = x_test[patient][i][j][k][0:dim3_test]


#     # x_test[patient][i][1] = x_test[patient][i][1][0:dim2_test]
#     # x_test[patient][i][0] = x_test[patient][i][0][0:dim3_test]



# for patient in x_train.keys():
#   for i in range(4):
#     x_train[patient][i] = x_train[patient][i][0:dim1_train, 0:dim2_train, 0:dim3_train]



In [6]:
# print(x_train[patient][0])

In [7]:
# for patient in x_test.keys():
#   for i in range(4):
#     print(x_test[patient][i].shape)

# for patient in x_train.keys():
#   for i in range(4):
#     print(x_train[patient][i].shape)

import new data
reshape images
split the training data into training and validation data
standardize data
convert the dense representation of the classes to one-hot encoding